Example #1
0
    def process_raw_path(self, data_path, label_path):
        y_offset = 0
        data_list = []
        cat_ys = []
        for cat_idx, cat in enumerate(self.categories):
            idx = self.category_ids[cat]
            point_paths = sorted(glob.glob(osp.join(data_path, idx, '*.pts')))
            y_paths = sorted(glob.glob(osp.join(label_path, idx, '*.seg')))

            points = [read_txt_array(path) for path in point_paths]
            ys = [read_txt_array(path, dtype=torch.long) for path in y_paths]
            lens = [y.size(0) for y in ys]

            y = torch.cat(ys).unique(return_inverse=True)[1] + y_offset
            cat_ys.append(y.unique())
            y_offset = y.max().item() + 1
            ys = y.split(lens)

            for (pos, y) in zip(points, ys):
                data = Data(y=y, pos=pos, category=cat_idx)
                if self.pre_filter is not None and not self.pre_filter(data):
                    continue
                if self.pre_transform is not None:
                    data = self.pre_transform(data)
                data_list.append(data)

        y_mask = torch.zeros((len(self.categories), y_offset),
                             dtype=torch.bool)
        for i in range(len(cat_ys)):
            y_mask[i, cat_ys[i]] = 1

        return data_list, y_mask
Example #2
0
    def process_raw_path(self, data_path, label_path):
        y_offset = 0
        data_list = []
        cat_ys = []
        for cat_idx, cat in enumerate(self.categories):
            idx = self.category_ids[cat]
            point_paths = sorted(glob.glob(osp.join(data_path, idx, '*.pts')))
            y_paths = sorted(glob.glob(osp.join(label_path, idx, '*.seg')))

            points = [read_txt_array(path) for path in point_paths]

            # ys = list of tensor(1, 2, 3, 2, 1) labels for each item
            ys = [read_txt_array(path, dtype=torch.long) for path in y_paths]

            # print('Points and shape: ', points)
            print('=' * 50)
            print('Labels and shape: ', ys)

            # Retrieve the lengths of each label tensor (i.e. number of labelled points)
            lens = [y.size(0) for y in ys]

            # torch.cat(ys): First, concatenate all the tensors in the horizontal dimension as a single very long tensor
            # .unique(return_inverse=True)[1]: Second, return the 'standardized labels', which are now labels from 0 to N
            y = torch.cat(ys).unique(return_inverse=True)[1] + y_offset

            # Return a set of labels (which are 0 to N)
            cat_ys.append(y.unique())

            y_offset = y.max().item() + 1

            # Having recorded the lengths of all separate items (total number of points per item),
            # we now split the concatenated horizontal tensor back into the list of tensors per item
            ys = y.split(lens)

            # Now create a dataset per item
            for (pos, y) in zip(points, ys):
                print(y)
                data = Data(y=y, pos=pos, category=cat_idx)
                if self.pre_filter is not None and not self.pre_filter(data):
                    continue
                if self.pre_transform is not None:
                    data = self.pre_transform(data)
                data_list.append(data)

        y_mask = torch.zeros((len(self.categories), y_offset),
                             dtype=torch.bool)
        for i in range(len(cat_ys)):
            y_mask[i, cat_ys[i]] = 1

        return data_list, y_mask
Example #3
0
    def _process_filenames(self, filenames):
        data_raw_list = []
        data_list = []
        categories_ids = [self.category_ids[cat] for cat in self.categories]
        cat_idx = {categories_ids[i]: i for i in range(len(categories_ids))}

        has_pre_transform = self.pre_transform is not None

        id_scan = -1
        for name in tq(filenames):
            cat = name.split(osp.sep)[0]
            if cat not in categories_ids:
                continue
            id_scan += 1
            data = read_txt_array(osp.join(self.raw_dir, name))
            pos = data[:, :3]
            x = data[:, 3:6]
            y = data[:, -1].type(torch.long)
            category = torch.ones(x.shape[0], dtype=torch.long) * cat_idx[cat]
            id_scan_tensor = torch.from_numpy(np.asarray([id_scan])).clone()
            data = Data(pos=pos,
                        x=x,
                        y=y,
                        category=category,
                        id_scan=id_scan_tensor)
            data = SaveOriginalPosId()(data)
            if self.pre_filter is not None and not self.pre_filter(data):
                continue
            data_raw_list.append(data.clone() if has_pre_transform else data)
            if has_pre_transform:
                data = self.pre_transform(data)
                data_list.append(data)
        if not has_pre_transform:
            return [], data_raw_list
        return data_raw_list, data_list
Example #4
0
    def process_set(self, dataset):
        with open(
                osp.join(self.raw_dir,
                         "modelnet{}_shape_names.txt".format(self.name)),
                "r") as f:
            categories = f.read().splitlines()
            categories = sorted(categories)
        with open(
                osp.join(self.raw_dir,
                         "modelnet{}_{}.txt".format(self.name, dataset)),
                "r") as f:
            split_objects = f.read().splitlines()

        data_list = []
        for target, category in enumerate(categories):
            folder = osp.join(self.raw_dir, category)
            category_ojects = filter(lambda o: category in o, split_objects)
            paths = [
                "{}/{}.txt".format(folder, o.strip()) for o in category_ojects
            ]
            for path in paths:
                raw = read_txt_array(path, sep=",")
                data = Data(pos=raw[:, :3],
                            norm=raw[:, 3:],
                            y=torch.tensor([target]))
                data_list.append(data)

        if self.pre_filter is not None:
            data_list = [d for d in data_list if self.pre_filter(d)]

        if self.pre_transform is not None:
            data_list = [self.pre_transform(d) for d in data_list]

        return self.collate(data_list)
Example #5
0
    def process_set(self, dataset):
        f = osp.join(self.raw_dir, f'modelnet{self.name}_shape_names.txt')
        with open(f, 'r') as f:
            categories = f.read().split('\n')[:-1]
            cate_id = {cate : i for i, cate in enumerate(categories)}
            
        f = osp.join(self.raw_dir, f'modelnet{self.name}_{dataset}.txt')
        with open(f, 'r') as f:
            file_list = f.read().split('\n')[:-1]
        
        data_list = []
        with tqdm(file_list) as t:
            for file_name in t:
                category = '_'.join(file_name.split('_')[:-1])
                f = osp.join(self.raw_dir, category, f'{file_name}.txt')
                data = read_txt_array(f, sep=',')
                data = Data(pos=data[:, :3], norm=data[:, 3:])
                data.y = torch.tensor([cate_id[category]])
                data_list.append(data)

        if self.pre_filter is not None:
            data_list = [d for d in data_list if self.pre_filter(d)]

        if self.pre_transform is not None:
            data_list = [self.pre_transform(d) for d in data_list]

        return self.collate(data_list)
Example #6
0
    def process_graph(self, triple_path, feature_path):
        g1 = read_txt_array(triple_path, sep='\t', dtype=torch.long)
        subj, rel, obj = g1.t()
        name_dict = {}
        with open(feature_path, 'r') as f:
            for line in f:
                info = line.strip().split('\t')
                info = info if len(info) == 2 else info + ['']
                seq_str = remove_punc(info[1]).strip()
                if seq_str == "":
                    seq_str = '<unk>'
                name_dict[int(info[0])] = seq_str

        idx = torch.tensor(list(name_dict.keys()))
        assoc = torch.full((idx.max().item() + 1,), -1, dtype=torch.long)
        assoc[idx] = torch.arange(idx.size(0))

        subj, obj = assoc[subj], assoc[obj]
        edge_index = torch.stack([subj, obj], dim=0)
        edge_index, rel = sort_edge_index(edge_index, rel)

        # xs = [None for _ in range(idx.size(0))]
        names = [None for _ in range(idx.size(0))]
        for i in name_dict.keys():
            names[assoc[i]] = name_dict[i]
        # x = torch.nn.utils.rnn.pad_sequence(xs, batch_first=True)

        return edge_index, rel, assoc, names
Example #7
0
 def process_events(self) -> torch.Tensor:
     events = []
     for path in self.raw_paths:
         data = read_txt_array(path, sep='\t', end=4, dtype=torch.long)
         data[:, 3] = data[:, 3] // 15
         events += [data]
     return torch.cat(events, dim=0)
Example #8
0
    def process_graph(
        self,
        triple_path: str,
        feature_path: str,
        embeddings: Dict[str, Tensor],
    ) -> Tuple[Tensor, Tensor, Tensor, Tensor]:

        g1 = read_txt_array(triple_path, sep='\t', dtype=torch.long)
        subj, rel, obj = g1.t()

        x_dict = {}
        with open(feature_path, 'r') as f:
            for line in f:
                info = line.strip().split('\t')
                info = info if len(info) == 2 else info + ['**UNK**']
                seq = info[1].lower().split()
                hs = [embeddings.get(w, embeddings['**UNK**']) for w in seq]
                x_dict[int(info[0])] = torch.stack(hs, dim=0)

        idx = torch.tensor(list(x_dict.keys()))
        assoc = torch.full((idx.max().item() + 1, ), -1, dtype=torch.long)
        assoc[idx] = torch.arange(idx.size(0))

        subj, obj = assoc[subj], assoc[obj]
        edge_index = torch.stack([subj, obj], dim=0)
        edge_index, rel = sort_edge_index(edge_index, rel)

        xs = [None for _ in range(idx.size(0))]
        for i in x_dict.keys():
            xs[assoc[i]] = x_dict[i]
        x = torch.nn.utils.rnn.pad_sequence(xs, batch_first=True)

        return x, edge_index, rel, assoc
Example #9
0
    def process_filenames(self, filenames):
        data_list = []
        categories_ids = [self.category_ids[cat] for cat in self.categories]
        categories_indices = [
            list(self.category_ids).index(key) for key in self.categories
        ]
        cat_idx = {
            categories_ids[i]: v
            for i, v in enumerate(categories_indices)
        }
        for name in filenames:
            cat = name.split(osp.sep)[0]
            if cat not in categories_ids:
                continue

            data = read_txt_array(osp.join(self.raw_dir, name))
            pos = data[:, :3]
            x = data[:, 3:6]
            y = data[:, -1].type(torch.long)
            data = Data(pos=pos, x=x, y=y, category=cat_idx[cat])
            if self.pre_filter is not None and not self.pre_filter(data):
                continue
            if self.pre_transform is not None:
                data = self.pre_transform(data)
            data_list.append(data)

        return data_list
Example #10
0
 def _read_file(self, filename):
     raw = read_txt_array(filename)
     pos = raw[:, :3]
     x = raw[:, 3:6]
     if raw.shape[1] == 7:
         y = raw[:, 6].type(torch.long)
     else:
         y = None
     return Data(pos=pos, x=x, y=y)
Example #11
0
 def process_y_n2id(self, link_path, n2i_curr, n2i_dataset):
     curr_reverse = self.pair[:2] == "en"
     assoc0, assoc1 = tuple([self.get_assoc(n2i_curr[i], n2i_dataset[i]) for i in range(2)])
     g1, g2 = read_txt_array(link_path, "\t", dtype=torch.long).t()
     if curr_reverse:
         g1, g2 = g2, g1
     g1 = assoc0[g1]
     g2 = assoc1[g2]
     return torch.stack([g1, g2], dim=0).to(self.device)
Example #12
0
    def process(self):
        data_list = []
        for cat in self.categories:
            paths = glob.glob(osp.join(self.raw_dir, f'{cat}*.tri'))
            paths = [path[:-4] for path in paths]
            paths = sorted(paths, key=lambda e: (len(e), e))

            for path in paths:
                pos = read_txt_array(f'{path}.vert')
                face = read_txt_array(f'{path}.tri', dtype=torch.long)
                data = Data(pos=pos, face=face.t().contiguous())
                if self.pre_filter is not None and not self.pre_filter(data):
                    continue
                if self.pre_transform is not None:
                    data = self.pre_transform(data)
                data_list.append(data)

        torch.save(self.collate(data_list), self.processed_paths[0])
Example #13
0
    def process(self):

        edge_path = osp.join(self.raw_dir, '{}_edgelist.txt'.format(self.name))
        edge_index = read_txt_array(edge_path, sep=',', dtype=torch.long).t()

        docs_path = osp.join(self.raw_dir, '{}_docs.txt'.format(self.name))
        f = open(docs_path, 'rb')
        content_list = []
        for line in f.readlines():
            line = str(line, encoding="utf-8")
            content_list.append(line.split(","))
        x = np.array(content_list, dtype=float)
        x = torch.from_numpy(x).to(torch.float)

        label_path = osp.join(self.raw_dir, '{}_labels.txt'.format(self.name))
        f = open(label_path, 'rb')
        content_list = []
        for line in f.readlines():
            line = str(line, encoding="utf-8")
            line = line.replace("\r", "").replace("\n", "")
            content_list.append(line)
        y = np.array(content_list, dtype=int)
        y = torch.from_numpy(y).to(torch.int64)

        data_list = []
        data = Data(edge_index=edge_index, x=x, y=y)

        random_node_indices = np.random.permutation(y.shape[0])
        training_size = int(len(random_node_indices) * 0.7)
        val_size = int(len(random_node_indices) * 0.1)
        train_node_indices = random_node_indices[:training_size]
        val_node_indices = random_node_indices[training_size:training_size + val_size]
        test_node_indices = random_node_indices[training_size + val_size:]

        train_masks = torch.zeros([y.shape[0]], dtype=torch.uint8)
        train_masks[train_node_indices] = 1
        val_masks = torch.zeros([y.shape[0]], dtype=torch.uint8)
        val_masks[val_node_indices] = 1
        test_masks = torch.zeros([y.shape[0]], dtype=torch.uint8)
        test_masks[test_node_indices] = 1

        data.train_mask = train_masks
        data.val_mask = val_masks
        data.test_mask = test_masks


        if self.pre_transform is not None:
            data = self.pre_transform(data)

        data_list.append(data)

        data, slices = self.collate([data])

        torch.save((data, slices), self.processed_paths[0])
Example #14
0
    def process(self):
        path_file = self.raw_paths
        with open(path_file[0], "r") as f:
            filenames = f.read().split('\n')[:-1]
        data_list = []
        for filename in filenames:
            pos_path = osp.join(self.raw_dir, filename+'.xyz')
            normal_path = osp.join(self.raw_dir, filename+'.normals')
            curv_path = osp.join(self.raw_dir, filename+'.curv')
            idx_path = osp.join(self.raw_dir, filename+'.pidx')
            pos = read_txt_array(pos_path)
            normals = read_txt_array(normal_path)
            curv = read_txt_array(curv_path)
            normals_and_curv = torch.cat([normals, curv], dim=1)
            pidx = read_txt_array(idx_path, dtype=torch.long)
            data = Data(pos=pos, x=normals_and_curv, y=pidx)
            if self.pre_filter is not None and not self.pre_filter(data):
                continue
            if self.pre_transform is not None:
                data = self.pre_transform(data)
            data_list.append(data)

        torch.save(self.collate(data_list), self.processed_paths[0])
Example #15
0
def read_file(folder, prefix, name):
    path = osp.join(folder, "ind.{}.{}".format(prefix.lower(), name))

    if name == "test.index":
        return read_txt_array(path, dtype=torch.long)

    with open(path, "rb") as f:
        if sys.version_info > (3, 0):
            out = pickle.load(f, encoding="latin1")
        else:
            out = pickle.load(f)

    if name == "graph" or name == "y.index":
        return out

    out = out.todense() if hasattr(out, "todense") else out
    out = torch.Tensor(out)
    return out
Example #16
0
def read_file(folder, prefix, name):
    path = osp.join(folder, 'ind.{}.{}'.format(prefix.lower(), name))

    if name == 'test.index':
        return read_txt_array(path, dtype=torch.long)

    with open(path, 'rb') as f:
        if sys.version_info > (3, 0):
            out = pickle.load(f, encoding='latin1')
        else:
            out = pickle.load(f)

    if name == 'graph':
        return out

    out = out.todense() if hasattr(out, 'todense') else out
    out = torch.Tensor(out)
    return out
Example #17
0
def read_file(folder, prefix, name):
    path = osp.join(folder, f'ind.{prefix.lower()}.{name}')

    if name == 'test.index':
        return read_txt_array(path, dtype=torch.long)

    with open(path, 'rb') as f:
        if sys.version_info > (3, 0):
            warnings.filterwarnings('ignore', '.*`scipy.sparse.csr` name.*')
            out = pickle.load(f, encoding='latin1')
        else:
            out = pickle.load(f)

    if name == 'graph':
        return out

    out = out.todense() if hasattr(out, 'todense') else out
    out = torch.Tensor(out)
    return out
Example #18
0
    def process(self):
        x = sp.load_npz(
            osp.join(self.raw_dir, f'new_{self.feature}_feature.npz'))
        x = torch.from_numpy(x.todense()).to(torch.float)

        edge_index = read_txt_array(osp.join(self.raw_dir, 'A.txt'),
                                    sep=',',
                                    dtype=torch.long).t()
        edge_index, _ = coalesce(edge_index, None, x.size(0), x.size(0))

        y = np.load(osp.join(self.raw_dir, 'graph_labels.npy'))
        y = torch.from_numpy(y).to(torch.long)
        _, y = y.unique(sorted=True, return_inverse=True)

        batch = np.load(osp.join(self.raw_dir, 'node_graph_id.npy'))
        batch = torch.from_numpy(batch).to(torch.long)

        node_slice = torch.cumsum(batch.bincount(), 0)
        node_slice = torch.cat([torch.tensor([0]), node_slice])
        edge_slice = torch.cumsum(batch[edge_index[0]].bincount(), 0)
        edge_slice = torch.cat([torch.tensor([0]), edge_slice])
        graph_slice = torch.arange(y.size(0) + 1)
        self.slices = {
            'x': node_slice,
            'edge_index': edge_slice,
            'y': graph_slice
        }

        edge_index -= node_slice[batch[edge_index[0]]].view(1, -1)
        self.data = Data(x=x, edge_index=edge_index, y=y)

        for path, split in zip(self.processed_paths, ['train', 'val', 'test']):
            idx = np.load(osp.join(self.raw_dir, f'{split}_idx.npy')).tolist()
            data_list = [self.get(i) for i in idx]
            if self.pre_filter is not None:
                data_list = [d for d in data_list if self.pre_filter(d)]
            if self.pre_transform is not None:
                data_list = [self.pre_transform(d) for d in data_list]
            torch.save(self.collate(data_list), path)
    def process(self):
        ref_data = read_off(
            osp.join(self.raw_paths[0], 'null', f'{self.cat}.off'))

        train_list = []
        name = f'{self.part}_{self.cat}_*.off'
        paths = glob.glob(osp.join(self.raw_paths[0], self.part, name))
        paths = [path[:-4] for path in paths]
        paths = sorted(paths, key=lambda e: (len(e), e))

        for path in paths:
            data = read_off(f'{path}.off')
            y = read_txt_array(f'{path}.baryc_gt')
            data.y = y[:, 0].to(torch.long) - 1
            data.y_baryc = y[:, 1:]
            train_list.append(data)

        test_list = []
        name = f'{self.part}_{self.cat}_*.off'
        paths = glob.glob(osp.join(self.raw_paths[1], self.part, name))
        paths = [path[:-4] for path in paths]
        paths = sorted(paths, key=lambda e: (len(e), e))

        for path in paths:
            test_list.append(read_off(f'{path}.off'))

        if self.pre_filter is not None:
            train_list = [d for d in train_list if self.pre_filter(d)]
            test_list = [d for d in test_list if self.pre_filter(d)]

        if self.pre_transform is not None:
            ref_data = self.pre_transform(ref_data)
            train_list = [self.pre_transform(d) for d in train_list]
            test_list = [self.pre_transform(d) for d in test_list]

        torch.save(ref_data, self.processed_paths[0])
        torch.save(self.collate(train_list), self.processed_paths[1])
        torch.save(self.collate(test_list), self.processed_paths[2])
Example #20
0
    def process_filenames(self, filenames):
        data_list = []
        categories_ids = [self.category_ids[cat] for cat in self.categories]
        cat_idx = {categories_ids[i]: i for i in range(len(categories_ids))}

        for name in tq(filenames):
            cat = name.split(osp.sep)[0]
            if cat not in categories_ids:
                continue

            data = read_txt_array(osp.join(self.raw_dir, name))
            pos = data[:, :3]
            x = data[:, 3:6]
            y = data[:, -1].type(torch.long)
            category = torch.ones(x.shape[0], dtype=torch.long) * cat_idx[cat]
            data = Data(pos=pos, x=x, y=y, category=category)
            if self.pre_filter is not None and not self.pre_filter(data):
                continue
            if self.pre_transform is not None:
                data = self.pre_transform(data)
            data_list.append(data)

        return data_list
Example #21
0
def read_file(folder, name, dtype=None):
	path = osp.join(folder, '{}.txt'.format(name))
	return read_txt_array(path, sep=',', dtype=dtype)
Example #22
0
def read_file(folder, prefix, name, dtype=None):
    path = osp.join(folder, f'{prefix}_{name}.txt')
    return read_txt_array(path, sep=',', dtype=dtype)
Example #23
0
 def read_file(folder, prefix, name, dtype=None):
     path = osp.join(folder, "{}_{}.txt".format(prefix, name))
     return read_txt_array(path, sep=",", dtype=dtype)
Example #24
0
 def process_y(self, path: str, assoc1: Tensor, assoc2: Tensor) -> Tensor:
     row, col, mask = read_txt_array(path, sep='\t', dtype=torch.long).t()
     mask = mask.to(torch.bool)
     return torch.stack([assoc1[row[mask]], assoc2[col[mask]]], dim=0)