예제 #1
0
    def process(self):
        idx = self.categories[self.category]
        paths = [osp.join(path, idx) for path in self.raw_paths]
        datasets = []
        for path in zip(paths[::2], paths[1::2]):
            pos_paths = sorted(glob.glob(osp.join(path[0], '*.pts')))
            y_paths = sorted(glob.glob(osp.join(path[1], '*.seg')))
            data_list = []
            for path in zip(pos_paths, y_paths):
                pos = read_txt_array(path[0])
                y = read_txt_array(path[1], dtype=torch.long)
                data = Data(y=y, pos=pos)
                if self.pre_filter is not None and not self.pre_filter(data):
                    continue
                if self.pre_transform is not None:
                    data = self.pre_transform(data)
                data_list.append(data)
            datasets.append(data_list)

        makedirs(osp.join(self.processed_dir, self.category))
        train_data, train_slices = self.collate(datasets[0] + datasets[1])
        test_data, test_slices = self.collate(datasets[2])

        _, train_data.y = train_data.y.unique(return_inverse=True)
        _, test_data.y = test_data.y.unique(return_inverse=True)

        torch.save((train_data, train_slices), self.processed_paths[0])
        torch.save((test_data, test_slices), self.processed_paths[1])
예제 #2
0
    def process_raw_path(self, data_path, label_path):
        y_offset = 0
        data_list = []
        cat_ys = []
        for cat_idx, cat in enumerate(self.categories):
            idx = self.category_ids[cat]
            point_paths = sorted(glob.glob(osp.join(data_path, idx, '*.pts')))
            y_paths = sorted(glob.glob(osp.join(label_path, idx, '*.seg')))

            points = [read_txt_array(path) for path in point_paths]
            ys = [read_txt_array(path, dtype=torch.long) for path in y_paths]
            lens = [y.size(0) for y in ys]

            y = torch.cat(ys).unique(return_inverse=True)[1] + y_offset
            cat_ys.append(y.unique())
            y_offset = y.max().item() + 1
            ys = y.split(lens)

            for (pos, y) in zip(points, ys):
                data = Data(y=y, pos=pos, category=cat_idx)
                if self.pre_filter is not None and not self.pre_filter(data):
                    continue
                if self.pre_transform is not None:
                    data = self.pre_transform(data)
                data_list.append(data)

        y_mask = torch.zeros((len(self.categories), y_offset),
                             dtype=torch.uint8)
        for i in range(len(cat_ys)):
            y_mask[i, cat_ys[i]] = 1

        return data_list, y_mask
예제 #3
0
    def process(self):
        data_splits = [[], [], []]  # Corresponds to train/val/test
        cat_names = list(self.categories.keys())
        category_infos = {}
        cat_sid = 0
        for cid, cat in enumerate(cat_names):
            infos = {'category_id':cid}
            paths = [osp.join(path, self.categories[cat])for path in self.raw_paths]
            print('Processing category {}'.format(cat))

            # Loop over train/val/test split
            ymax = 0  # Largest label value of this category
            sample_nums = []
            for split, path in enumerate(zip(paths[::2], paths[1::2])):
                pos_paths = sorted(glob.glob(osp.join(path[0], '*.pts')))
                y_paths = sorted(glob.glob(osp.join(path[1], '*.seg')))

                # Gather all category split samples
                data_list = []
                for path in zip(pos_paths, y_paths):
                    pos = read_txt_array(path[0])
                    y = read_txt_array(path[1], dtype=torch.long)
                    y = self.reindex_labels(y, cat_sid)  
                    ymax = max(ymax, y.max())
                    data = Data(y=y, pos=pos, cid=cid)

                    # Data preprocessing
                    if self.pre_filter is not None and not self.pre_filter(data):
                        continue
                    if self.pre_transform is not None:
                        data = self.pre_transform(data)

                    data_list.append(data)
                sample_nums.append(len(data_list))
                data_splits[split] += data_list # Merge splits data across categories
            cat_eid = ymax.item()
            infos['part_id_min'] = cat_sid
            infos['part_id_max'] = cat_eid
            infos['part_num'] = cat_eid - cat_sid + 1
            infos['sample_num'] = sample_nums
            category_infos[cat] = infos
            print('>Category:{} Infos:{}'.format(cat, infos))
            cat_sid = cat_eid + 1 # Update category start index

        train_data, train_slices = self.collate(data_splits[0])
        val_data, val_slices = self.collate(data_splits[1])
        test_data, test_slices = self.collate(data_splits[2])

        # Save processed data
        torch.save((train_data, train_slices), self.processed_paths[0])
        torch.save((val_data, val_slices), self.processed_paths[1])
        torch.save((test_data, test_slices), self.processed_paths[2])
        torch.save(category_infos, osp.join(self.processed_dir, 'all_infos.pt'))
예제 #4
0
    def process(self):
        data_list = []
        for cat in self.categories:
            paths = glob.glob(osp.join(self.raw_dir, '{}*.tri'.format(cat)))
            paths = [path[:-4] for path in paths]
            paths = sorted(paths, key=lambda e: (len(e), e))

            for path in paths:
                pos = read_txt_array('{}.vert'.format(path))
                face = read_txt_array('{}.tri'.format(path), dtype=torch.long)

                face = face.t().contiguous() - 1

                random_list = sorted(
                    random.sample(range(int(face.size(1) - 1)), self.num))
                face = face[:, random_list]
                face_set_a = set(face[0, :])
                face_set_b = set(face[1, :])
                face_set_c = set(face[2, :])
                pos_list = sorted(list(face_set_a | face_set_b | face_set_c))
                for i, _ in enumerate(pos_list):
                    pos_list[i] = int(pos_list[i])
                dict_key = {}
                for i, j in enumerate(pos_list):
                    dict_key[int(j)] = i
                for i in range(face.size(1)):
                    face[0][i] = dict_key[int(face[0][i])]
                    face[1][i] = dict_key[int(face[1][i])]
                    face[2][i] = dict_key[int(face[2][i])]
                pos = pos[pos_list]

                assert pos.size(1) == 3 and face.size(0) == 3

                edge_index = torch.cat([face[:2], face[1:], face[::2]], dim=1)
                edge_index = to_undirected(edge_index, num_nodes=self.num)

                data = Data(pos=pos,
                            face=face,
                            edge_index=edge_index,
                            y=self.categories.index(cat))

                if self.pre_filter is not None and not self.pre_filter(data):
                    continue
                if self.pre_transform is not None:
                    data = self.pre_transform(data)

                data_list.append(data)

        torch.save(self.collate(data_list), self.processed_paths[0])
예제 #5
0
파일: dbp15k.py 프로젝트: zhihy96/Multi-GNN
    def process_graph(self, triple_path, feature_path, embeddings):
        g1 = read_txt_array(triple_path, sep='\t', dtype=torch.long)
        subj, rel, obj = g1.t()

        x_dict = {}
        with open(feature_path, 'r') as f:
            for line in f:
                info = line.strip().split('\t')
                info = info if len(info) == 2 else info + ['**UNK**']
                seq = info[1].lower().split()
                hs = [embeddings.get(w, embeddings['**UNK**']) for w in seq]
                x_dict[int(info[0])] = torch.stack(hs, dim=0)

        idx = torch.tensor(list(x_dict.keys()))
        assoc = torch.full((idx.max().item() + 1, ), -1, dtype=torch.long)
        assoc[idx] = torch.arange(idx.size(0))

        subj, obj = assoc[subj], assoc[obj]
        edge_index = torch.stack([subj, obj], dim=0)
        edge_index, rel = sort_edge_index(edge_index, rel)

        xs = [None for _ in range(idx.size(0))]
        for i in x_dict.keys():
            xs[assoc[i]] = x_dict[i]
        x = torch.nn.utils.rnn.pad_sequence(xs, batch_first=True)

        return x, edge_index, rel, assoc
예제 #6
0
    def process(self):
        data_list = []
        for off_path in glob.glob('{}/*.off'.format(self.raw_paths[0])):
            data = read_off(off_path)
            shape_id = osp.basename(off_path).rsplit('.', 1)[0]
            label_path = osp.join(self.raw_paths[1], shape_id + '.seg')
            data.y = read_txt_array(label_path) - 1  # start from 0
            if self.classification is not None:
                if self.classification in data.y:
                    data.y = torch.tensor([1])
                else:
                    data.y = torch.tensor([0])

            data.shape_id = torch.tensor([int(shape_id)])
            # if int(shape_id) < 10:
            data_list.append(data)

        if self.pre_filter is not None:
            data_list = [d for d in data_list if self.pre_filter(d)]

        if self.pre_transform is not None:
            data_list = [self.pre_transform(d) for d in data_list]

        val_split = 0.15
        val_size = int(len(data_list) * val_split)

        torch.save(self.collate(data_list[:len(data_list) - val_size]), self.processed_paths[0])
        torch.save(self.collate(data_list[len(data_list) - val_size:]), self.processed_paths[1])
예제 #7
0
 def process_events(self):
     events = []
     for path in self.raw_paths:
         data = read_txt_array(path, sep='\t', end=4, dtype=torch.long)
         data[:, 3] = data[:, 3] / 24
         events += [data]
     return torch.cat(events, dim=0)
예제 #8
0
    def process(self):
        data_list = []
        splits = [0]
        for raw_path in self.raw_paths:
            srot = read_txt_array(raw_path, sep='\t', end=4, dtype=torch.long)
            row, rel, col, time = srot.t().contiguous()
            time = time / self.granularity

            count = time.bincount()
            split_sections = count[count > 0].tolist()

            rows = row.split(split_sections)
            cols = col.split(split_sections)
            rels = rel.split(split_sections)
            times = time.split(split_sections)
            splits.append(splits[-1] + len(rows))

            for row, col, rel, time in zip(rows, cols, rels, times):
                edge_index = torch.stack([row, col], dim=0)
                data = Data(edge_index=edge_index, edge_type=rel, time=time)
                if self.pre_transform is not None:
                    data = self.pre_transform(data)
                data_list.append(data)

        torch.save(self.collate(data_list), self.processed_paths[0])
        torch.save(splits, self.processed_paths[1])
예제 #9
0
def read_file(folder, prefix, name):
    path = osp.join(folder, 'ind.{}.{}'.format(prefix.lower(), name))

    if name == 'test.index':
        return read_txt_array(path, dtype=torch.long)

    with open(path, 'rb') as f:
        if sys.version_info > (3, 0):
            out = pickle.load(f, encoding='latin1')
        else:
            out = pickle.load(f)

    if name == 'graph':
        return out

    out = out.todense() if hasattr(out, 'todense') else out
    print('If input x has nan or inf', np.isinf(out).any(), np.isnan(out).any())

    # for fast training, we discard one-hot encoding and use 32 dimension vector from gaussian distribution
    if prefix == 'ddi_constraint' or prefix == 'decagon':
        if name == 'allx':
            transformer = random_projection.GaussianRandomProjection(
                n_components=32)
            out = transformer.fit_transform(out)
    out = torch.FloatTensor(out)
    return out
예제 #10
0
    def process(self):
        data_list = []
        for cat in self.categories:
            paths = glob.glob(osp.join(self.raw_dir, '{}*.tri'.format(cat)))
            paths = [path[:-4] for path in paths]
            paths = sorted(paths, key=lambda e: (len(e), e))

            for path in paths:
                pos = read_txt_array('{}.vert'.format(path))
                face = read_txt_array('{}.tri'.format(path), dtype=torch.long)
                data = Data(pos=pos, face=face.t().contiguous())
                if self.pre_filter is not None and not self.pre_filter(data):
                    continue
                if self.pre_transform is not None:
                    data = self.pre_transform(data)
                data_list.append(data)

        torch.save(self.collate(data_list), self.processed_paths[0])
예제 #11
0
    def process(self):
        idx = self.categories[self.category]
        paths = [osp.join(path, idx) for path in self.raw_paths]
        datasets = []
        for path in zip(paths[::2], paths[1::2]):
            pos_paths = sorted(glob.glob(osp.join(path[0], '*.pts')))
            y_paths = sorted(glob.glob(osp.join(path[1], '*.seg')))
            data_list = []
            for path in zip(pos_paths, y_paths):
                pos = read_txt_array(path[0])
                y = read_txt_array(path[1], dtype=torch.long)
                data = Data(y=y, pos=pos)
                if self.pre_transform is not None:
                    data = self.pre_transform(data)
                data_list.append(data)
            datasets.append(data_list)

        makedirs(osp.join(self.processed_dir, self.category))
        data_list = datasets[0] + datasets[1]
        torch.save(self.collate(data_list), self.processed_paths[0])
        torch.save(self.collate(datasets[2]), self.processed_paths[1])
예제 #12
0
    def process_raw_path(self, data_path, label_path):
        y_offset = 0
        data_list = []
        for category in self.categories:
            idx = self.category_ids[category]
            point_paths = sorted(glob.glob(osp.join(data_path, idx, '*.pts')))
            y_paths = sorted(glob.glob(osp.join(label_path, idx, '*.seg')))

            points = [read_txt_array(path) for path in point_paths]
            ys = [read_txt_array(path, dtype=torch.long) for path in y_paths]
            lens = [y.size(0) for y in ys]

            y = torch.cat(ys).unique(return_inverse=True)[1] + y_offset
            y_offset = y.max().item() + 1
            ys = y.split(lens)

            for (pos, y) in zip(points, ys):
                data = Data(y=y, pos=pos)
                if self.pre_filter is not None and not self.pre_filter(data):
                    continue
                if self.pre_transform is not None:
                    data = self.pre_transform(data)
                data_list.append(data)
        return data_list
예제 #13
0
    def process(self):
        path_file = self.raw_paths
        with open(path_file[0], "r") as f:
            filenames = f.read().split('\n')[:-1]
        data_list = []
        for filename in filenames:
            pos_path = osp.join(self.raw_dir, filename + '.xyz')
            normal_path = osp.join(self.raw_dir, filename + '.normals')
            curv_path = osp.join(self.raw_dir, filename + '.curv')
            idx_path = osp.join(self.raw_dir, filename + '.pidx')
            pos = read_txt_array(pos_path)
            normals = read_txt_array(normal_path)
            curv = read_txt_array(curv_path)
            normals_and_curv = torch.cat([normals, curv], dim=1)
            test_idx = read_txt_array(idx_path, dtype=torch.long)
            data = Data(pos=pos, x=normals_and_curv)
            data.test_idx = test_idx
            if self.pre_filter is not None and not self.pre_filter(data):
                continue
            if self.pre_transform is not None:
                data = self.pre_transform(data)
            data_list.append(data)

        torch.save(self.collate(data_list), self.processed_paths[0])
def read_file(folder, prefix, name):
    path = osp.join(folder, 'ind.{}.{}'.format(prefix.lower(), name))

    if name == 'test.index':
        return read_txt_array(path, dtype=torch.long)

    with open(path, 'rb') as f:
        if sys.version_info > (3, 0):
            out = pickle.load(f, encoding='latin1')
        else:
            out = pickle.load(f)

    if name == 'graph':
        return out

    out = out.todense() if hasattr(out, 'todense') else out
    out = torch.Tensor(out)
    return out
예제 #15
0
    def process(self):
        ref_data = read_off(
            osp.join(self.raw_paths[0], 'null', '{}.off'.format(self.cat)))

        train_list = []
        name = '{}_{}_*.off'.format(self.part, self.cat)
        paths = glob.glob(osp.join(self.raw_paths[0], self.part, name))
        paths = [path[:-4] for path in paths]
        paths = sorted(paths, key=lambda e: (len(e), e))

        for path in paths:
            data = read_off('{}.off'.format(path))
            y = read_txt_array('{}.baryc_gt'.format(path))
            data.y = y[:, 0].to(torch.long) - 1
            data.y_baryc = y[:, 1:]
            train_list.append(data)

        test_list = []
        name = '{}_{}_*.off'.format(self.part, self.cat)
        paths = glob.glob(osp.join(self.raw_paths[1], self.part, name))
        paths = [path[:-4] for path in paths]
        paths = sorted(paths, key=lambda e: (len(e), e))

        for path in paths:
            test_list.append(read_off('{}.off'.format(path)))

        if self.pre_filter is not None:
            train_list = [d for d in train_list if self.pre_filter(d)]
            test_list = [d for d in test_list if self.pre_filter(d)]

        if self.pre_transform is not None:
            ref_data = self.pre_transform(ref_data)
            train_list = [self.pre_transform(d) for d in train_list]
            test_list = [self.pre_transform(d) for d in test_list]

        torch.save(ref_data, self.processed_paths[0])
        torch.save(self.collate(train_list), self.processed_paths[1])
        torch.save(self.collate(test_list), self.processed_paths[2])
예제 #16
0
def read_file(folder, prefix, name, dtype=None):
    path = osp.join(folder, '{}_{}.txt'.format(prefix, name))
    return read_txt_array(path, sep=',', dtype=dtype)
예제 #17
0
파일: dbp15k.py 프로젝트: zhihy96/Multi-GNN
 def process_y(self, path, assoc1, assoc2):
     row, col, mask = read_txt_array(path, sep='\t', dtype=torch.long).t()
     mask = mask.to(torch.uint8)
     return torch.stack([assoc1[row[mask]], assoc2[col[mask]]], dim=0)