예제 #1
0
 def __call__(self, data):
     if isinstance(data, list):
         data = [self._process(d) for d in tq(data)]
         data = list(itertools.chain(*data))  # 2d list needs to be flatten
     else:
         data = self._process(data)
     return data
예제 #2
0
def download_file(url: str, path: str, verbose: bool = False) -> None:
    """
    Download file with progressbar

    Usage:
        download_file('http://web4host.net/5MB.zip')
    """
    if not os.path.exists(path):
        os.makedirs(path)
    local_filename = os.path.join(path, url.split('/')[-1])
    r = requests.get(url, stream=True)
    file_size = int(r.headers['Content-Length']) if 'Content-Length' in r.headers else 0
    chunk_size = 1024
    num_bars = int(file_size / chunk_size)
    if verbose:
        print(dict(file_size=file_size))
        print(dict(num_bars=num_bars))

    if not os.path.exists(local_filename):
        with open(local_filename, 'wb') as fp:
            for chunk in tq(
                r.iter_content(chunk_size=chunk_size),
                total=num_bars,
                unit='KB',
                desc=local_filename,
                leave=True  # progressbar stays
            ):
                fp.write(chunk)  # type: ignore

    if '.zip' in local_filename:
        if os.path.exists(local_filename):
            with zipfile.ZipFile(local_filename, 'r') as zip_ref:
                zip_ref.extractall(path)
예제 #3
0
    def _process_filenames(self, filenames):
        data_raw_list = []
        data_list = []
        categories_ids = [self.category_ids[cat] for cat in self.categories]
        cat_idx = {categories_ids[i]: i for i in range(len(categories_ids))}

        has_pre_transform = self.pre_transform is not None

        id_scan = -1
        for name in tq(filenames):
            cat = name.split(osp.sep)[0]
            if cat not in categories_ids:
                continue
            id_scan += 1
            data = read_txt_array(osp.join(self.raw_dir, name))
            pos = data[:, :3]
            x = data[:, 3:6]
            y = data[:, -1].type(torch.long)
            category = torch.ones(x.shape[0], dtype=torch.long) * cat_idx[cat]
            id_scan_tensor = torch.from_numpy(np.asarray([id_scan])).clone()
            data = Data(pos=pos,
                        x=x,
                        y=y,
                        category=category,
                        id_scan=id_scan_tensor)
            data = SaveOriginalPosId()(data)
            if self.pre_filter is not None and not self.pre_filter(data):
                continue
            data_raw_list.append(data.clone() if has_pre_transform else data)
            if has_pre_transform:
                data = self.pre_transform(data)
                data_list.append(data)
        if not has_pre_transform:
            return [], data_raw_list
        return data_raw_list, data_list
예제 #4
0
def generate(size, **kwargs):
    generator = DataGenerator.from_robbins_dataset(
        diamlims=kwargs["diamlims"],
        ellipse_limit=kwargs["ellipse_limit"],
        arc_lims=kwargs["arc_lims"],
        axis_threshold=kwargs["axis_threshold"],
        fov=kwargs["fov"],
        resolution=kwargs["resolution"],
        filled=kwargs["filled"],
        mask_thickness=kwargs["mask_thickness"],
        instancing=kwargs["instancing"])

    date_dataset = np.empty((size, 3), int)
    images_dataset = np.empty((size, 1, *generator.resolution), np.float32)
    if kwargs["instancing"]:
        masks_dataset = np.empty((size, 1, *generator.resolution), np.int16)
    else:
        masks_dataset = np.empty((size, 1, *generator.resolution), np.bool_)
    position_dataset = np.empty((size, 3, 1), np.float64)
    attitude_dataset = np.empty((size, 3, 3), np.float64)
    sol_incidence_dataset = np.empty((size, 1), np.float16)

    A_craters = []

    for i in tq(range(size), desc="Creating dataset"):
        date = dt.date(2021, np.random.randint(1, 12), 1)
        generator.set_random_position()
        generator.scene_time = date
        date_dataset[i] = np.array((date.year, date.month, date.day))

        while not (kwargs["min_sol_incidence"] <=
                   generator.solar_incidence_angle <=
                   kwargs["max_sol_incidence"]):
            generator.set_random_position()  # Generate random position

        position_dataset[i] = generator.position
        sol_incidence_dataset[i] = generator.solar_incidence_angle

        generator.point_nadir()
        if kwargs["randomized_orientation"]:
            # Rotations are incremental (order matters)
            generator.rotate('roll', np.random.randint(0, 360))
            generator.rotate('pitch', np.random.randint(-30, 30))
            generator.rotate('yaw', np.random.randint(-30, 30))

        attitude_dataset[i] = generator.attitude

        image, mask = generator.image_mask_pair()

        masks_dataset[i] = mask[None, None, ...]
        images_dataset[i] = image[None, None, ...]

        if kwargs["save_craters"]:
            A_craters.append(generator.craters_in_image())

    return images_dataset, masks_dataset, position_dataset, attitude_dataset, date_dataset, sol_incidence_dataset, A_craters
예제 #5
0
def sample_chains(susceptible,
                  initial_infected,
                  model,
                  daily_ri_values,
                  num_chains=1000,
                  n_workers=None,
                  pool=None,
                  deterministic=False,
                  show_progress=False):

    if n_workers is not None and n_workers > 1 and pool is None:
        pool = initialize_pool(n_workers, np.random.SeedSequence())

    pbar = None
    if show_progress:
        try:
            from tqdm.auto import tqdm as tq
            pbar = tq(total=num_chains)
        except:
            warnings.warn(
                "Could not load tqdm to show progress, please install it to use the option show_progress"
            )

    simulations = np.zeros(shape=(num_chains, len(daily_ri_values),
                                  len(STATE_NAMES)))
    if pool is None:
        it = (simulation(susceptible,
                         initial_infected,
                         model,
                         daily_ri_values,
                         deterministic=deterministic)
              for _ in range(num_chains))
    else:
        it = pool.imap_unordered(
            _fn_simulation,
            [(susceptible, initial_infected, model.parameters, daily_ri_values,
              model.alphas, model.betas, deterministic)
             for _ in range(num_chains)])

    for i, (st, _) in enumerate(it):
        simulations[i, :, :] = st
        if pbar is not None:
            pbar.update()

    if pbar is not None:
        pbar.close()

    if pool is not None:
        pool.terminate()
    return simulations
예제 #6
0
def download_data(url: str,
                  path: str = "data/",
                  verbose: bool = False) -> None:
    """
    Download file with progressbar

    # Code taken from: https://gist.github.com/ruxi/5d6803c116ec1130d484a4ab8c00c603
    # __author__  = "github.com/ruxi"
    # __license__ = "MIT"

    Usage:
        download_file('http://web4host.net/5MB.zip')
    """
    if url == "NEED_TO_BE_CREATED":
        raise NotImplementedError

    if not os.path.exists(path):
        os.makedirs(path)
    local_filename = os.path.join(path, url.split('/')[-1])
    r = requests.get(url, stream=True, verify=False)
    file_size = int(
        r.headers['Content-Length']) if 'Content-Length' in r.headers else 0
    chunk_size = 1024
    num_bars = int(file_size / chunk_size)
    if verbose:
        print(dict(file_size=file_size))
        print(dict(num_bars=num_bars))

    if not os.path.exists(local_filename):
        with open(local_filename, 'wb') as fp:
            for chunk in tq(
                    r.iter_content(chunk_size=chunk_size),
                    total=num_bars,
                    unit='KB',
                    desc=local_filename,
                    leave=True  # progressbar stays
            ):
                fp.write(chunk)  # type: ignore

    if '.zip' in local_filename:
        if os.path.exists(local_filename):
            with zipfile.ZipFile(local_filename, 'r') as zip_ref:
                zip_ref.extractall(path)
예제 #7
0
    def process_filenames(self, filenames):
        data_list = []
        categories_ids = [self.category_ids[cat] for cat in self.categories]
        cat_idx = {categories_ids[i]: i for i in range(len(categories_ids))}

        for name in tq(filenames):
            cat = name.split(osp.sep)[0]
            if cat not in categories_ids:
                continue

            data = read_txt_array(osp.join(self.raw_dir, name))
            pos = data[:, :3]
            x = data[:, 3:6]
            y = data[:, -1].type(torch.long)
            category = torch.ones(x.shape[0], dtype=torch.long) * cat_idx[cat]
            data = Data(pos=pos, x=x, y=y, category=category)
            if self.pre_filter is not None and not self.pre_filter(data):
                continue
            if self.pre_transform is not None:
                data = self.pre_transform(data)
            data_list.append(data)

        return data_list
예제 #8
0
def download_file(url: str, path: str, verbose: bool = False) -> None:
    """
    Download file with progressbar

    Usage:
        download_file('http://web4host.net/5MB.zip')
    """
    if not os.path.exists(path):
        os.makedirs(path)
    local_filename = os.path.join(path, url.split('/')[-1])

    if not os.path.exists(local_filename):
        r = requests.get(url, stream=True)
        file_size = int(r.headers.get('Content-Length', 0))
        chunk = 1
        chunk_size = 1024
        num_bars = int(file_size / chunk_size)
        if verbose:
            logging.info(f'file size: {file_size}\n# bars: {num_bars}')
        with open(local_filename, 'wb') as fp:
            for chunk in tq(
                    r.iter_content(chunk_size=chunk_size),
                    total=num_bars,
                    unit='KB',
                    desc=local_filename,
                    leave=True  # progressbar stays
            ):
                fp.write(chunk)  # type: ignore

    if '.zip' in local_filename:
        if os.path.exists(local_filename):
            with zipfile.ZipFile(local_filename, 'r') as zip_ref:
                zip_ref.extractall(path)
    elif '.tar.gz' in local_filename:
        if os.path.exists(local_filename):
            with tarfile.open(local_filename, 'r') as tar_ref:
                tar_ref.extractall(path)
예제 #9
0
    def process(self):
        if not os.path.exists(self.pre_processed_path):
            train_areas = [f for f in self.folders if str(self.test_area) not in f]
            test_areas = [f for f in self.folders if str(self.test_area) in f]

            train_files = [
                (f, room_name, osp.join(self.raw_dir, f, room_name))
                for f in train_areas
                for room_name in os.listdir(osp.join(self.raw_dir, f))
                if os.path.isdir(osp.join(self.raw_dir, f, room_name))
            ]

            test_files = [
                (f, room_name, osp.join(self.raw_dir, f, room_name))
                for f in test_areas
                for room_name in os.listdir(osp.join(self.raw_dir, f))
                if os.path.isdir(osp.join(self.raw_dir, f, room_name))
            ]

            # Gather data per area
            data_list = [[] for _ in range(6)]
            for (area, room_name, file_path) in tq(train_files + test_files):

                area_num = int(area[-1]) - 1
                if self.debug:
                    read_s3dis_format(file_path, room_name, label_out=True, verbose=self.verbose, debug=self.debug)
                    continue
                else:
                    xyz, rgb, room_labels, room_object_indices = read_s3dis_format(
                        file_path, room_name, label_out=True, verbose=self.verbose, debug=self.debug
                    )

                    rgb_norm = rgb.float() / 255.0
                    data = Data(pos=xyz, y=room_labels, rgb=rgb_norm)
                    if room_name in VALIDATION_ROOMS:
                        data.validation_set = True
                    else:
                        data.validation_set = False

                    if self.keep_instance:
                        data.room_object_indices = room_object_indices

                    if self.pre_filter is not None and not self.pre_filter(data):
                        continue

                    data_list[area_num].append(data)

            raw_areas = cT.PointCloudFusion()(data_list)
            for i, area in enumerate(raw_areas):
                torch.save(area, self.raw_areas_paths[i])

            for area_datas in data_list:
                # Apply pre_transform
                if self.pre_transform is not None:
                    for data in area_datas:
                        data = self.pre_transform(data)
            torch.save(data_list, self.pre_processed_path)
        else:
            data_list = torch.load(self.pre_processed_path)

        if self.debug:
            return

        train_data_list = {}
        val_data_list = {}
        trainval_data_list = {}
        for i in range(6):
            if i != self.test_area - 1:
                train_data_list[i] = []
                val_data_list[i] = []
                for data in data_list[i]:
                    validation_set = data.validation_set
                    del data.validation_set
                    if validation_set:
                        val_data_list[i].append(data)
                    else:
                        train_data_list[i].append(data)
                trainval_data_list[i] = val_data_list[i] + train_data_list[i]

        train_data_list = list(train_data_list.values())
        val_data_list = list(val_data_list.values())
        trainval_data_list = list(trainval_data_list.values())
        test_data_list = data_list[self.test_area - 1]

        if self.pre_collate_transform:
            log.info("pre_collate_transform ...")
            log.info(self.pre_collate_transform)
            train_data_list = self.pre_collate_transform(train_data_list)
            val_data_list = self.pre_collate_transform(val_data_list)
            test_data_list = self.pre_collate_transform(test_data_list)
            trainval_data_list = self.pre_collate_transform(trainval_data_list)

        self._save_data(train_data_list, val_data_list, test_data_list, trainval_data_list)
예제 #10
0
                             num_workers=2)

    #confusion matrix
    conf_mat = np.zeros((len(LABELS), len(LABELS)))

    outputs = []
    y_trues = []

    # SVM, RF, NB and HGB
    y_trues_sota = []
    y_preds_svm = []
    y_preds_rf = []
    y_preds_nb = []
    y_preds_hgb = []

    for idx, (image, y_true, _) in tq(enumerate(test_loader),
                                      total=len(test_loader)):
        if sota:
            # apply SVM, RF, NB and HGB methods
            features = s2_to_ndvifdi(image.squeeze(0).numpy())
            y_trues_sota.append(y_true.squeeze(0))

            # svm
            y_pred_svm = clf_svm.predict(features.reshape(2, -1).T).reshape(
                128, 128)
            y_preds_svm.append(y_pred_svm)

            # rf
            y_pred_rf = clf_rf.predict(features.reshape(2, -1).T).reshape(
                128, 128)
            y_preds_rf.append(y_pred_rf)
예제 #11
0
def download_data(url: str,
                  path: str = "data/",
                  verbose: bool = False) -> None:
    """Download file with progressbar.

    # Code adapted from: https://gist.github.com/ruxi/5d6803c116ec1130d484a4ab8c00c603
    # __author__  = "github.com/ruxi"
    # __license__ = "MIT"

    Examples
    ________

    .. doctest::

        >>> import os
        >>> from flash.core.data.utils import download_data
        >>> download_data("https://pl-flash-data.s3.amazonaws.com/titanic.zip", "./data")
        >>> os.listdir("./data")  # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
        [...]
    """
    # Disable warning about making an insecure request
    urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

    if not os.path.exists(path):
        os.makedirs(path)
    local_filename = os.path.join(path, url.split("/")[-1])
    r = requests.get(url, stream=True, verify=False)
    file_size = int(
        r.headers["Content-Length"]) if "Content-Length" in r.headers else 0
    chunk_size = 1024
    num_bars = int(file_size / chunk_size)
    if verbose:
        print(dict(file_size=file_size))
        print(dict(num_bars=num_bars))

    if not os.path.exists(local_filename):
        with open(local_filename, "wb") as fp:
            for chunk in tq(
                    r.iter_content(chunk_size=chunk_size),
                    total=num_bars,
                    unit="KB",
                    desc=local_filename,
                    leave=True,  # progressbar stays
            ):
                fp.write(chunk)  # type: ignore

    def extract_tarfile(file_path: str, extract_path: str, mode: str):
        if os.path.exists(file_path):
            with tarfile.open(file_path, mode=mode) as tar_ref:
                for member in tar_ref.getmembers():
                    try:
                        tar_ref.extract(member,
                                        path=extract_path,
                                        set_attrs=False)
                    except PermissionError:
                        raise PermissionError(
                            f"Could not extract tar file {file_path}")

    if ".zip" in local_filename:
        if os.path.exists(local_filename):
            with zipfile.ZipFile(local_filename, "r") as zip_ref:
                zip_ref.extractall(path)
    elif local_filename.endswith(".tar.gz") or local_filename.endswith(".tgz"):
        extract_tarfile(local_filename, path, "r:gz")
    elif local_filename.endswith(".tar.bz2") or local_filename.endswith(
            ".tbz"):
        extract_tarfile(local_filename, path, "r:bz2")
예제 #12
0
train_loss_list = []
valid_loss_list = []
dice_score_list = []
lr_rate_list = []
valid_loss_min = np.Inf # track change in validation loss
for epoch in range(1, n_epochs+1):

    # keep track of training and validation loss
    train_loss = 0.0
    valid_loss = 0.0
    dice_score = 0.0
    ###################
    # train the model #
    ###################
    model.train()
    bar = tq(train_loader, postfix={"train_loss":0.0})
    for data, target in bar:
        # move tensors to GPU if CUDA is available
        if train_on_gpu:
            data, target = data.cuda(), target.cuda()
        # clear the gradients of all optimized variables
        optimizer.zero_grad()
        # forward pass: compute predicted outputs by passing inputs to the model
        output = model(data)
        # calculate the batch loss
        loss = criterion(output, target)
        #print(loss)
        # backward pass: compute gradient of the loss with respect to model parameters
        loss.backward()
        # perform a single optimization step (parameter update)
        optimizer.step()
예제 #13
0
    def performance_metrics(self, iou_threshold=0.5, confidence_thresholds=None, distance_threshold=None):

        if confidence_thresholds is None:
            confidence_thresholds = torch.arange(start=0.05, end=0.99, step=0.05).to(self.device)

        loader = DataLoader(self.ds, batch_size=32, shuffle=True, num_workers=0, collate_fn=collate_fn)

        bar = tq(loader, desc=f"Testing",
                 postfix={
                     "IoU": 0.,
                     "GA_distance": 0.,
                     "precision": 0.,
                     "recall": 0.,
                     "f1_score": 0.
                 })

        precision = torch.zeros((len(loader), loader.batch_size, len(confidence_thresholds)), device=self.device)
        recall = torch.zeros((len(loader), loader.batch_size, len(confidence_thresholds)), device=self.device)
        f1 = torch.zeros((len(loader), loader.batch_size, len(confidence_thresholds)), device=self.device)
        iou = torch.zeros((len(loader), loader.batch_size, len(confidence_thresholds)), device=self.device)
        dist = torch.zeros((len(loader), loader.batch_size, len(confidence_thresholds)), device=self.device)

        for batch, (images, targets_all) in enumerate(bar):
            images = list(image.to(self.device) for image in images)
            targets_all = [{k: v.to(self.device) for k, v in t.items()} for t in targets_all]

            pred_all = self._model(images)

            for i, (pred, targets) in enumerate(zip(pred_all, targets_all)):
                for j, confidence_threshold in enumerate(confidence_thresholds):
                    precision[batch, i, j], recall[batch, i, j], f1[batch, i, j], \
                    iou[batch, i, j], dist[batch, i, j] = detection_metrics(pred,
                                                                            targets,
                                                                            iou_threshold=iou_threshold,
                                                                            confidence_threshold=confidence_threshold,
                                                                            distance_threshold=distance_threshold)

            postfix = dict(
                IoU=iou[batch].mean().item(),
                GA_distance=dist[batch].mean().item(),
                precision=precision[batch].mean().item(),
                recall=recall[batch].mean().item(),
                f1_score=f1[batch].mean().item()
            )
            bar.set_postfix(ordered_dict=postfix)

        del images, targets_all

        precision_out = torch.zeros(len(confidence_thresholds))
        recall_out = torch.zeros(len(confidence_thresholds))
        f1_out = torch.zeros(len(confidence_thresholds))
        iou_out = torch.zeros(len(confidence_thresholds))
        dist_out = torch.zeros(len(confidence_thresholds))

        for i in range(len(confidence_thresholds)):
            precision_out[i], recall_out[i], f1_out[i], iou_out[i], dist_out[i] = map(
                lambda x: x[..., i][x[..., i] > 0.].mean(),
                (precision, recall, f1, iou, dist)
            )

        precision, recall, f1, iou, dist = map(lambda x: x.mean((0, 1)), (precision, recall, f1, iou, dist))
        return precision, recall, f1, iou, dist, confidence_thresholds
예제 #14
0
    def train(self, train_loader, valid_loader,
              optimizer, scheduler, valid_score_fn,
              n_epochs, train_on_gpu=False, verbose=False, rst_path=None):
        """
        
        :param train_loader: 
        :param valid_loader: 
        :param optimizer: 
        :param scheduler: 
        :param valid_score_fn: 
        :param n_epochs: 
        :param train_on_gpu: 
        :param verbose: 
        :param rst_path: a string. 
            Path to the folder where the error and the best model should be stored. 
        :return: 
        """

        if train_on_gpu:
            self.model.cuda()

        train_loss_list, valid_loss_list, dice_score_list = [], [], []
        lr_rate_list = []
        valid_loss_min = np.Inf
        for epoch in range(1, n_epochs + 1):
            # keep track of training and validation loss
            train_loss = 0.0
            valid_loss = 0.0
            dice_score = 0.0

            ###################
            # train the model #
            ###################
            self.model.train()

            bar = tq(train_loader, postfix={"train_loss": 0.0})
            for data, target in bar:
                # move tensors to GPU if CUDA is available
                if train_on_gpu:
                    data, target = data.cuda(), target.cuda()
                optimizer.zero_grad()
                # forward pass: compute predicted outputs by passing inputs to the model
                output = self.model(data)

                # calculate the batch loss
                loss = self.criterion(output, target)
                # backward pass: compute gradient of the loss with respect to model parameters
                loss.backward()
                # perform a single optimization step (parameter update)
                optimizer.step()

                train_loss += loss.item() * data.size(0)
                # print("Loss item: {}, data_size:{}".format(loss.item(), data.size(0)))
                bar.set_postfix(ordered_dict={"train_loss": loss.item()})

            ######################
            # validate the model #
            ######################
            self.model.eval()
            del data, target
            with torch.no_grad():
                bar = tq(valid_loader, postfix={"valid_loss": 0.0, "dice_score": 0.0})
                for data, target in bar:
                    # move tensors to GPU if CUDA is available
                    if train_on_gpu:
                        data, target = data.cuda(), target.cuda()

                    output = self.model(data)
                    loss = self.criterion(output, target)
                    # update average validation loss
                    valid_loss += loss.item() * data.size(0)
                    dice_cof = valid_score_fn(output.cpu(), target.cpu()).item()
                    dice_score += dice_cof * data.size(0)
                    bar.set_postfix(ordered_dict={"valid_loss": loss.item(), "dice_score": dice_cof})

            # calculate average losses
            train_loss = train_loss / len(train_loader.dataset)
            valid_loss = valid_loss / len(valid_loader.dataset)
            dice_score = dice_score / len(valid_loader.dataset)
            train_loss_list.append(train_loss)
            valid_loss_list.append(valid_loss)
            dice_score_list.append(dice_score)
            lr_rate_list.append([param_group['lr'] for param_group in optimizer.param_groups])

            # print training/validation statistics
            print('Epoch: {}  Training Loss: {:.6f}  Validation Loss: {:.6f} Dice Score: {:.6f}'.format(
                epoch, train_loss, valid_loss, dice_score))

            if rst_path is not None:
                with open(join(rst_path, 'training_rst.txt'), 'w') as frst:
                    frst.write(str(train_loss_list) + '\n')
                    frst.write(str(valid_loss_list) + '\n')
                    frst.write(str(dice_score_list) + '\n')

                # save model if validation loss has decreased
                if valid_loss <= valid_loss_min:
                    print('Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...'.format(
                        valid_loss_min,
                        valid_loss))
                    torch.save(self.model.state_dict(), join(rst_path, 'model_cifar.pt'))
                    valid_loss_min = valid_loss

            scheduler.step(valid_loss)

        return train_loss_list, valid_loss_list, dice_score_list, lr_rate_list