Beispiel #1
0
    def gen(self, ham, itr=1000, nbasis=1000, burn=500, thin=1):
        _, state = ground(ham)
        n = ham.lattice.numel()

        try:
            # os.makedirs(os.path.join(self.root, self.raw_folder))
            os.makedirs(os.path.join(self.root, self.processed_folder))
        except OSError as e:
            if e.errno == errno.EEXIST:
                pass
            else:
                raise

        data = []
        basis = []
        for _ in range(nbasis):
            ops = MBOp(choice([sigmax, sigmay, sigmaz]) for i in range(n))
            _state = ops.invtrans(state)
            sampler = STMetropolis(proposal=lambda x: abs(_state[bin(x)])**2,
                                   size=ham.size)
            data.extend(sampler.sample(itr=itr, burn=burn, thin=thin))
            basis.extend(
                torch.FloatTensor(ops.params()).resize_(2 * n)
                for i in range(len(sampler.collector)))
        return data, basis
def print_instances_class_histogram(dataset_dicts, class_names):
    """
    Args:
        dataset_dicts (list[dict]): list of dataset dicts.
        class_names (list[str]): list of class names (zero-indexed).
    """
    num_classes = len(class_names)
    hist_bins = np.arange(num_classes + 1)
    histogram = np.zeros((num_classes,), dtype=np.int)
    for entry in dataset_dicts:
        annos = entry["annotations"]
        classes = [x["category_id"] for x in annos if not x.get("iscrowd", 0)]
        histogram += np.histogram(classes, bins=hist_bins)[0]

    N_COLS = min(6, len(class_names) * 2)

    def short_name(x):
        # make long class names shorter. useful for lvis
        if len(x) > 13:
            return x[:11] + ".."
        return x

    data = list(
        itertools.chain(*[[short_name(class_names[i]), int(v)] for i, v in enumerate(histogram)])
    )
    total_num_instances = sum(data[1::2])
    data.extend([None] * (N_COLS - (len(data) % N_COLS)))
    if num_classes > 1:
        data.extend(["total", total_num_instances])
    data = itertools.zip_longest(*[data[i::N_COLS] for i in range(N_COLS)])
    logging.info("Distribution of instances among all {} categories:\n".format(num_classes))
    logging.info(pformat(data))
def parse_annotation(ICDAR_path, id, len_max):
    # print(id)

    texts = list()
    labels = list()

    f_task1 = open(
        os.path.join(ICDAR_path, 'task3-test(347p)/task3-test(347p)/' + id))
    f_img = os.path.join(
        ICDAR_path,
        'task3-test(347p)/task3-test(347p)/' + id.strip('txt') + 'jpg')

    im = Image.open(f_img)
    im_width = im.size[0]
    im_height = im.size[1]

    # In ICDAR case, the first line is our ROI coordinate (xmin, ymin)
    line_txt = f_task1.readline()
    coor = line_txt.split(',')
    ROI_x = int(coor[0].strip('\''))
    ROI_y = int(coor[1].strip('\''))

    final_total = 0

    while line_txt:
        line_txt = f_task1.readline()
        coor = line_txt.split(',')
        # print(coor)

        if coor[0] != '"\r\n' and coor[0] != '"\n' and coor[0] != '' and coor[
                0] != '\r\n':

            xmin = float((int(coor[0].strip('\'')) - ROI_x)) / float(im_width)
            ymin = float((int(coor[1].strip('\'')) - ROI_y)) / float(im_height)
            xmax = float((int(coor[4].strip('\'')) - ROI_x)) / float(im_width)
            ymax = float((int(coor[5].strip('\'')) - ROI_y)) / float(im_height)

            text = coor[8:]

            # 'ori_text' pretains special signs which block the following comparison but are useful in encoding
            ori_text = copy.deepcopy(text)
            ori_text = ','.join(ori_text)
            ori_text = list(ori_text)
            text_ascii = [ord(c) for c in ori_text]

            data = [xmin, ymin, xmax, ymax]
            data.extend(text_ascii)

            data_pad = [0] * len_max
            if len(data) <= len_max:
                data_pad[:len(data)] = data
            else:
                data_pad = data[:len_max]

            # print(label)

            texts.append(data_pad)

    return {'texts': texts}, [ROI_x, ROI_y, im_width, im_height]
Beispiel #4
0
def print_instances_class_histogram(dataset_dicts, class_names):
    """
    Args:
        dataset_dicts (list[dict]): list of dataset dicts.
        class_names (list[str]): list of class names (zero-indexed).
    """

    logger = logging.getLogger(__name__)
    logger.info("Build instances class histogram")

    num_classes = len(class_names)
    hist_bins = np.arange(num_classes + 1)
    histogram = np.zeros((num_classes, ), dtype=np.int)
    for entry in tqdm(dataset_dicts):
        annos = entry["annotations"]
        classes = np.asarray([x["category_id"] for x in annos], dtype=np.int)
        if len(classes):
            assert classes.min(
            ) >= 0, f"Got an invalid category_id={classes.min()}"
            assert (
                classes.max() < num_classes
            ), f"Got an invalid category_id={classes.max()} for a dataset of {num_classes} classes"
        histogram += np.histogram(classes, bins=hist_bins)[0]

    N_COLS = min(6, len(class_names) * 2)

    def short_name(x):
        # make long class names shorter. useful for lvis
        if len(x) > 13:
            return x[:11] + ".."
        return x

    data = list(
        itertools.chain(
            *[[short_name(class_names[i]), int(v)]
              for i, v in enumerate(histogram)]))
    total_num_instances = sum(data[1::2])
    data.extend([None] * (N_COLS - (len(data) % N_COLS)))
    if num_classes > 1:
        data.extend(["total", total_num_instances])
    data = itertools.zip_longest(*[data[i::N_COLS] for i in range(N_COLS)])

    # TODO: Too many classes expected. Need to summarize histogram

    table = tabulate(
        data,
        headers=["category", "#instances"] * (N_COLS // 2),
        tablefmt="pipe",
        numalign="left",
        stralign="center",
    )
    log_first_n(
        logging.INFO,
        "Distribution of instances among all {} categories:\n".format(
            num_classes) + colored(table, "cyan"),
        key="message",
    )
Beispiel #5
0
 def build_data(self, textdata: TextData):
     data = []
     for n in range(len(textdata.sentences)):
         indexes = textdata.get_sentence_indexes(n)
         ngram_position_range = range(-self.context_size, len(indexes))
         ngrams = [
             self.get_ngram_tensors_at_position(position, indexes)
             for position in ngram_position_range
         ]
         data.extend(ngrams)
     return data
Beispiel #6
0
def print_instances_class_histogram(dataset_dicts, class_names):
    """
    Args:
        dataset_dicts (list[dict]): list of dataset dicts.
        class_names (list[str]): list of class names (zero-indexed).
    """
    num_classes = len(class_names)
    hist_bins = np.arange(num_classes + 1)  # [0,1]
    histogram = np.zeros((num_classes, ), dtype=np.int)
    for entry in dataset_dicts:
        annos = entry["annotations"]  # iscrowd 为0 的才有效
        classes = [x["category_id"] for x in annos if not x.get("iscrowd", 0)]
        # np.histogram(a, bin:int or list, range:tuple..)
        # a: 待统计数组,bin可以是int,那么要指定range,或者list表示bin的范围,
        # return hist:统计好的每个区间的数,bin:区间的划分
        histogram += np.histogram(classes, bins=hist_bins)[0]

    N_COLS = min(6, len(class_names) * 2)  # 一个图cols?

    def short_name(x):
        # make long class names shorter. useful for lvis
        if len(x) > 13:
            return x[:11] + ".."
        return x

    # 改短名字
    data = list(
        itertools.chain(
            *[[short_name(class_names[i]), int(v)]
              for i, v in enumerate(histogram)]))
    # data = [name1, num1, name2, v2, ...]
    total_num_instances = sum(
        data[1::2])  #  data[1::2] a[i:j:step]表示从index = 1 开始切片,直到末尾,step = 2
    data.extend([None] * (N_COLS - (len(data) % N_COLS)))  # 补齐为N_COLS 的倍数
    if num_classes > 1:
        data.extend(["total", total_num_instances])
    #data[i::N_COLS],为什么要搞出这么多个呢(6个)
    # zip_longest, 跟zip差不多,给定多个列表,a,b,.. ,按照最长的那个枚举,不足的填None
    data = itertools.zip_longest(*[data[i::N_COLS] for i in range(N_COLS)])
    # 不知道干啥。。。
    table = tabulate(
        data,
        headers=["category", "#instances"] * (N_COLS // 2),
        tablefmt="pipe",
        numalign="left",
        stralign="center",
    )
    log_first_n(
        logging.INFO,
        "Distribution of instances among all {} categories:\n".format(
            num_classes) + colored(table, "cyan"),
        key="message",
    )
Beispiel #7
0
    def build_vocab(self, min_freq=0, max_freq=sys.maxsize):
        """
		build vocab + add eos
		encode sentence
		"""
        with open(os.path.join(self.data_dir, 'train.txt'), 'r') as fn:
            data = fn.readlines()

        if 'lambada' in self.data_dir:
            with open(os.path.join(self.data_dir, 'test.txt'), 'r') as fn:
                data.extend(fn.readlines())

            with open(os.path.join(self.data_dir, 'valid.txt'), 'r') as fn:
                data.extend(fn.readlines())

        print('building vocab ...')
        self.vocab = defaultdict(int)
        self.tok2id = {}
        self.id2tok = []

        for line in tqdm(data):
            line = line.strip().split()
            for tok in line:
                self.vocab[tok] += 1

        self.vocab = {
            a: self.vocab[a]
            for a in self.vocab
            if self.vocab[a] >= min_freq and self.vocab[a] <= max_freq
        }
        # sort vocab in case of using adaptive softmax
        self.vocab = list(
            sorted(self.vocab.items(), key=lambda a: a[1], reverse=True))
        print(self.vocab[:10])

        if 'lambada' in self.data_dir:
            self.vocab = self.vocab[:60000]
            self.vocab.append(('<unk>', 0))

        self.id2tok = ['<pad>'] + ['<eos>'] + [a[0] for a in self.vocab]
        self.tok2id = {a: i for i, a in enumerate(self.id2tok)}
        self.vocab_size = len(self.id2tok)

        print('end building vocab ...')
        print('vocab size', len(self.tok2id))
        with open(os.path.join(self.data_dir, 'vocab.pkl'), 'wb') as fn:
            pickle.dump(
                {
                    'id2tok': self.id2tok,
                    'tok2id': self.tok2id,
                    'vocab_size': self.vocab_size
                }, fn)
Beispiel #8
0
    def __getitem__(self, index):
        assert (index < self._dataset_size)

        #if self._is_for_train:  # random
        #    index = random.randint(0, self._dataset_size-1)

        # get sample data
        data = self._data[index] / 1000
        data = list(data)

        data_reverse = copy.deepcopy(data)
        data_reverse.reverse()

        filt_rri1 = list(moving_average(RRi(data), order=1))
        filt_rri2 = list(moving_average(RRi(data), order=2))
        filt_rri3 = list(moving_average(RRi(data), order=3))

        filt_rri1_reverse = copy.deepcopy(filt_rri1)
        filt_rri2_reverse = copy.deepcopy(filt_rri2)
        filt_rri3_reverse = copy.deepcopy(filt_rri3)
        filt_rri1_reverse.reverse()
        filt_rri2_reverse.reverse()
        filt_rri3_reverse.reverse()

        order_data = [filt_rri1, filt_rri2, filt_rri3]
        order_data_reverse = [
            filt_rri1_reverse, filt_rri2_reverse, filt_rri3_reverse
        ]

        label = int(self._label[index])
        subject = self._subject[index]

        mean = self._mean_train[index]
        sdnn = self._sdnn_train[index]
        pnn50 = self._pnn50_train[index]
        rmssd = self._rmssd_train[index]
        lnrmssd = self._lnrmssd_train[index]
        vlf = self._vlf_train[index]
        lf = self._lf_train[index]
        hf = self._hf_train[index]
        rlh = self._rlh_train[index]

        features = list(np.stack((mean, sdnn, pnn50, rmssd, lnrmssd, \
                             vlf, lf, hf, rlh )))

        makeup_length = 512 - len(data)
        if len(data) > 512:
            data = data[:512]
        else:
            data.extend(0 for _ in range(makeup_length))

        return data, data_reverse, order_data, order_data_reverse, label, subject, features
Beispiel #9
0
def print_instances_class_histogram(dataset_dicts,
                                    class_names,
                                    attribute='annotations'):
    """
    Args:
        dataset_dicts (list[dict]): list of data dicts.
        class_names (list[str]): list of class names (zero-indexed).
    """
    num_classes = len(class_names)
    hist_bins = np.arange(num_classes + 1)
    histogram = np.zeros((num_classes, ), dtype=np.int)
    for entry in dataset_dicts:
        classes = []
        annos = entry[attribute]
        if attribute == 'annotations':
            classes = [
                x["category_id"] for x in annos if not x.get("iscrowd", 0)
            ]
        elif attribute == 'annotations2':
            classes = [x["category2_id"] for x in annos]
        histogram += np.histogram(classes, bins=hist_bins)[0]

    N_COLS = min(6, len(class_names) * 2)

    def short_name(x):
        # make long class names shorter. useful for lvis
        if len(x) > 13:
            return x[:11] + ".."
        return x

    data = list(
        itertools.chain(
            *[[short_name(class_names[i]), int(v)]
              for i, v in enumerate(histogram)]))
    total_num_instances = sum(data[1::2])
    data.extend([None] * (N_COLS - (len(data) % N_COLS)))
    if num_classes > 1:
        data.extend(["total", total_num_instances])
    data = itertools.zip_longest(*[data[i::N_COLS] for i in range(N_COLS)])
    table = tabulate(
        data,
        headers=["category", "#instances"] * (N_COLS // 2),
        tablefmt="pipe",
        numalign="left",
        stralign="center",
    )
    log_first_n(
        logging.INFO,
        "Distribution of instances among all {} categories:\n".format(
            num_classes) + colored(table, "cyan"),
        key="message",
    )
Beispiel #10
0
 def generate(self):
     data = []
     basis = []
     for _ in range(self.nbasis):
         ops = MBOp(choice([sigmax, sigmay, sigmaz]) for i in range(self.n))
         _state = ops.invtrans(self.state)
         sampler = STMetropolis(proposal=lambda x: abs(_state[bin(x)])**2,
                                size=self.size)
         data.extend(
             sampler.sample(itr=self.itr, burn=self.burn, thin=self.thin))
         basis.extend(
             torch.FloatTensor(ops.params()).resize_(2 * self.n)
             for i in range(len(sampler.collector)))
     return data, basis
Beispiel #11
0
    def load_data(self):
        data = []
        for idx, model_name in enumerate(os.listdir(self.root)):
            model_path = os.path.join(self.root, model_name)
            if 'task_' + str(self.task) in model_path:
                log('\nExtracting weights from %s' % model_path)

                model_weights = torch.load(model_path)['model_state_dict']
                vec = self.kernels_to_vector(model_weights)
                data.extend([vec])

        path = os.path.join(self.root, '../pickles/dataset.pkl')
        torch.save(data, path)

        return data
Beispiel #12
0
 def __init__(self, configs):
     split = configs['split']
     root = configs['root']
     self.transform = configs['transform']
     self.split = split
     data_dict = torch.load(
         os.path.join(root, 'data', 'cifar10', split + '.pth'))
     labels = []
     data = []
     for label, data_list in data_dict.items():
         n_samples = len(data_list)
         labels.extend([label] * n_samples)
         data.extend(data_list)
     print('Loaded %d data, %d labels' % (len(labels), len(data)))
     self.data = np.concatenate([x.reshape(1, -1) for x in data])
     print('Concatenated shape:', self.data.shape)
     self.data = self.data.reshape((-1, 3, 32, 32))
     self.data = self.data.transpose((0, 2, 3, 1))  # convert to HWC
     self.labels = labels
Beispiel #13
0
def get_cafe_data():
    '''
    Returns (data, labels) where data is an n x d tensor and labels is an
    n x 1 tensor.
    '''
    images = load_cafe()

    # Build data and label tensors
    data = []
    labels = []

    for i, exprList in enumerate(images):
        data.extend([transform(image).reshape(-1) for image in exprList])
        labels.extend([i] * len(exprList))

    labels = torch.tensor(labels)
    data = torch.stack(data)

    return data, labels
def receive(client_socket, size):
    """
    Recevie data to specific socket client.
    Args:
        client_socket: socket id
        size: data size to be received

    Returns:
        received data.
    """
    n = 8 * size
    data = bytearray()
    while len(data) < n:
        packet = client_socket.recv(n - len(data))
        if not packet:
            continue
        data.extend(packet)
    assert (n == len(data))
    res = decode_float_array(data, size)
    return res
Beispiel #15
0
def my_collate(batch, frame_size, fixed_frame_num):
    '''
    Break an utterance to multiple frames.
    The last frame is dropped if len(frame) < frame_size.

    '''
    data, target = [], []
    for (mat, label) in batch:
        data_shape = np.shape(mat)
        # print("mfcc mat shape:", data_shape, "label:", label)
        if mat.shape[0] >= frame_size:
            mul_mat, mul_label = breakIntoFrames(mat, label, frame_size,
                                                 fixed_frame_num)
            data.extend(mul_mat)
            target.extend(mul_label)
            # print(" mul_mat:", len(mul_mat), " mul_label:", len(mul_label))

    # print("total data len:", len(data), "total target len:", len(target))
    data = torch.LongTensor(data)
    target = torch.LongTensor(target)
    return [data, target]
Beispiel #16
0
    def load_json(json_path,
                  x='feat_length',
                  y='token_length',
                  x_range=(1, 9999),
                  y_range=(1, 999),
                  rate=(1, 99)):
        # json
        try:
            # json_path is a single file
            with open(json_path) as f:
                data = json.load(f)
        except:
            # json_path is a dir where *.json in
            data = []
            for dir, _, fs in os.walk(json_path):  # os.walk获取所有的目录
                for f in fs:
                    if f.endswith('.json'):  # 判断是否是".json"结尾
                        filename = os.path.join(dir, f)
                        print('loading json file :', filename)
                        with open(filename) as f:
                            add = json.load(f)
                            data.extend(add)
                        print('loaded {} samples'.format(len(add)))

        list_to_pop = []
        for i, sample in enumerate(data):
            len_x = sample[x]
            len_y = sample[y]
            if not (x_range[0] <= len_x <= x_range[1]) or \
               not (y_range[0] <= len_y <= y_range[1]) or \
               not (rate[0] <= (len_x / len_y) <= rate[1]):
                list_to_pop.append(i)

        # filter
        print('filtered {}/{} samples\n'.format(len(list_to_pop), len(data)))
        list_to_pop.reverse()
        [data.pop(i) for i in list_to_pop]

        return data
Beispiel #17
0
    def __init__(self,
                 json_path,
                 reverse=False,
                 feat_range=(1, 99999),
                 label_range=(1, 100),
                 rate_in_out=(4, 999)):
        try:
            # json_path is a single file
            with open(json_path) as f:
                data = json.load(f)
        except:
            # json_path is a dir where *.json in
            data = []
            for dir, _, fs in os.walk(json_path):  # os.walk获取所有的目录
                for f in fs:
                    if f.endswith('.json'):  # 判断是否是".json"结尾
                        filename = os.path.join(dir, f)
                        print('loading json file :', filename)
                        with open(filename) as f:
                            add = json.load(f)
                            data.extend(add)
                        print('loaded {} samples'.format(len(add)))

        # filter
        list_to_pop = []
        for i, sample in enumerate(data):
            len_x = sample['feat_length']
            len_y = sample['token_length']
            if not (feat_range[0] <= len_x <= feat_range[1]) or \
               not (label_range[0] <= len_y <= label_range[1]) or \
               not (rate_in_out[0] <= (len_x / len_y) <= rate_in_out[1]):
                list_to_pop.append(i)
        print('filtered {}/{} samples\n'.format(len(list_to_pop), len(data)))
        list_to_pop.reverse()
        [data.pop(i) for i in list_to_pop]

        self.data = sorted(data, key=lambda x: float(x["feat_length"]))
        if reverse:
            self.data.reverse()
Beispiel #18
0
 def __getitem__(self, index):
     data = []
     data0, label0 = self.get_item_once(index, self.video_list0)
     data1, label0 = self.get_item_once(index, self.video_list1)
     data2, label0 = self.get_item_once(index, self.video_list2)
     """
     data3, label0 = self.get_item_once(index,self.video_list3)
     data4, label0 = self.get_item_once(index,self.video_list4)
     data5, label0 = self.get_item_once(index,self.video_list5)
     data6, label0 = self.get_item_once(index,self.video_list6)
     data7, label0 = self.get_item_once(index,self.video_list7)
     """
     data.extend(data0)
     data.extend(data1)
     data.extend(data2)
     """
     data.extend(data3)
     data.extend(data4)
     data.extend(data5)
     data.extend(data6)
     data.extend(data7)
     """
     process_data = self.transform(data)
     return process_data, label0
Beispiel #19
0
    def __init__(self, scene, data_path, train, transform=None,
                 target_transform=None, real=False, skip_images=False, seed=7,
                 undistort=False, vo_lib='stereo', data_dir=None, unsupervise=False,
                 config=None):
        """
        :param scene: e.g. 'full' or 'loop'. collection of sequences.
        :param data_path: Root RobotCar data directory.
        Usually '../data/deepslam_data/RobotCar'
        :param train: flag for training / validation
        :param transform: Transform to be applied to images
        :param target_transform: Transform to be applied to poses
        :param real: it determines load ground truth pose or vo pose
        :param skip_images: return None images, only poses
        :param seed: random seed
        :param undistort: whether to undistort images (slow)
        :param vo_lib: Library to use for VO ('stereo' or 'gps')
        (`gps` is a misnomer in this code - it just loads the position information
        from GPS)
        :param data_dir: indicating where to load stats.txt file(to normalize image&pose)
        :param unsupervise: load training set as supervise or unsupervise
        """
        np.random.seed(seed)
        self.train = train
        self.transform = transform
        self.target_transform = target_transform
        self.skip_images = skip_images
        self.undistort = undistort
        base_dir = osp.expanduser(osp.join(data_path, scene))
        self.config = config
        # data_dir = osp.join('..', 'data', 'RobotCar', scene)

        if self.config.has_key('new_split') and self.config.new_split:
            print("use new split dataset")
            if train:
                split_filename = osp.join(base_dir, 'train_split.txt')
            else:
                split_filename = osp.join(base_dir, 'test_split.txt')

            with open(split_filename, 'r') as f:
                seqs = [l.rstrip() for l in f if not l.startswith('#')]

            pose_filename = osp.join(base_dir, "dataset_train.txt")
            pose_dict = {}
            with open(pose_filename, 'r') as f:
                data = f.readlines()[3:]

            pose_filename = osp.join(base_dir, "dataset_test.txt")
            with open(pose_filename, 'r') as f:
                data.extend(f.readlines()[3:])

                imgs = [
                    l.split(' ')[0] for l in data if not l.startswith('#')
                ]
                ps = np.asarray([
                    [float(num) for num in l.split(' ')[1:]]
                    for l in data if not l.startswith('#')
                ], dtype=np.float32)

                poses = np.zeros((ps.shape[0], 6))
                poses[:, :3] = ps[:, :3]
                poses[:, 3:] = np_qlog_t(ps[:, 3:])

                for idx, img_name in enumerate(imgs):
                    pose_dict[img_name] = poses[idx, :]

            self.poses = np.empty((0, 6))
            self.imgs = []
            for seq in seqs:
                # seq_dir = osp.join(base_dir, seq)
                img_names = [img for img in imgs if img.startswith(seq)]
                # print(img_names)

                poses = np.asarray([
                    pose_dict[img_name] for img_name in img_names if pose_dict.has_key(img_name)
                ])
                self.imgs.extend([osp.join(base_dir, img_name) for img_name in img_names])
                self.poses = np.vstack((self.poses, poses))

        else:
            if train:
                if unsupervise:
                    split_filename = osp.join(base_dir, 'unsupervised_train_split.txt')
                else:
                    split_filename = osp.join(base_dir, 'dataset_train.txt')
            else:
                split_filename = osp.join(base_dir, 'dataset_test.txt')
            with open(split_filename, 'r') as f:
                data = f.readlines()
                self.imgs = [
                    osp.join(
                        base_dir,
                        l.split(' ')[0]
                    ) for l in data[3:] if not l.startswith('#')
                ]
                ps = np.asarray([
                    [float(num) for num in l.split(' ')[1:]]
                    for l in data[3:] if not l.startswith('#')
                ], dtype=np.float32)

            self.poses = np.zeros((ps.shape[0], 6))
            self.poses[:, :3] = ps[:, :3]
            self.poses[:, 3:] = np_qlog_t(ps[:, 3:])

        self.mask_sampling = self.config.mask_sampling
        if self.mask_sampling:
            muimg = read_grayscale_image(osp.join(base_dir, self.config.mu_mask_name))
            self.muimg = torch.tensor(muimg.transpose(2, 0, 1)).type(torch.FloatTensor)
            self.sigmaimg = self.muimg * (1 - self.muimg)

        pose_stats_filename = osp.join(data_dir, 'pose_stats.txt')
        if train and not real:
            mean_t = np.mean(self.poses[:, :3], axis=0)
            std_t = np.std(self.poses[:, :3], axis=0)
            np.savetxt(pose_stats_filename, np.vstack((mean_t, std_t)), fmt='%8.7f')
            print("Saved")
        else:
            mean_t, std_t = np.loadtxt(pose_stats_filename)

        self.poses[:, :3] -= mean_t
        self.poses[:, :3] /= std_t

        # convert the pose to translation + log quaternion, align, normalize
        self.gt_idx = np.asarray(range(len(self.poses)))

        # camera model and image loader
        self.im_loader = partial(load_image)
    def __getitem__(self, index):

        count, id_idx, ii, dset, protein_id, seq_length = self.protein_list[
            index]
        window_size = self.window_size
        id_idx = int(id_idx)
        win_start = ii - window_size
        win_end = ii + window_size
        seq_length = int(seq_length)
        label_idx = (win_start + win_end) // 2

        all_seq_features = []
        seq_len = 0
        for idx in self.all_sequences[id_idx][:self.max_seq_len]:
            acid_one_hot = [0 for i in range(20)]
            acid_one_hot[idx] = 1
            all_seq_features.append(acid_one_hot)
            seq_len += 1
        while seq_len < self.max_seq_len:
            acid_one_hot = [0 for i in range(20)]
            all_seq_features.append(acid_one_hot)
            seq_len += 1

        all_pssm_features = self.all_pssm[id_idx][:self.max_seq_len]
        seq_len = len(all_pssm_features)
        while seq_len < self.max_seq_len:
            zero_vector = [0 for i in range(20)]
            all_pssm_features.append(zero_vector)
            seq_len += 1

        all_dssp_features = self.all_dssp[id_idx][:self.max_seq_len]
        seq_len = len(all_dssp_features)
        while seq_len < self.max_seq_len:
            zero_vector = [0 for i in range(9)]
            all_dssp_features.append(zero_vector)
            seq_len += 1

        local_features = []
        labels = []
        while win_start < 0:
            data = []
            acid_one_hot = [0 for i in range(20)]
            data.extend(acid_one_hot)

            pssm_zero_vector = [0 for i in range(20)]
            data.extend(pssm_zero_vector)

            dssp_zero_vector = [0 for i in range(9)]
            data.extend(dssp_zero_vector)

            local_features.extend(data)
            win_start += 1

        valid_end = min(win_end, seq_length - 1)
        while win_start <= valid_end:
            data = []
            idx = self.all_sequences[id_idx][win_start]

            acid_one_hot = [0 for i in range(20)]
            acid_one_hot[idx] = 1
            data.extend(acid_one_hot)

            pssm_val = self.all_pssm[id_idx][win_start]
            data.extend(pssm_val)

            try:
                dssp_val = self.all_dssp[id_idx][win_start]
            except:
                dssp_val = [0 for i in range(9)]
            data.extend(dssp_val)

            local_features.extend(data)
            win_start += 1

        while win_start <= win_end:
            data = []
            acid_one_hot = [0 for i in range(20)]
            data.extend(acid_one_hot)

            pssm_zero_vector = [0 for i in range(20)]
            data.extend(pssm_zero_vector)

            dssp_zero_vector = [0 for i in range(9)]
            data.extend(dssp_zero_vector)

            local_features.extend(data)
            win_start += 1

        label = self.all_label[id_idx][label_idx]
        label = np.array(label, dtype=np.float32)

        all_seq_features = np.stack(all_seq_features)
        all_seq_features = all_seq_features[np.newaxis, :, :]
        all_pssm_features = np.stack(all_pssm_features)
        all_pssm_features = all_pssm_features[np.newaxis, :, :]

        all_dssp_features = np.stack(all_dssp_features)
        all_dssp_features = all_dssp_features[np.newaxis, :, :]
        local_features = np.stack(local_features)

        return all_seq_features, all_pssm_features, all_dssp_features, local_features, label
Beispiel #21
0
    def __init__(self, hp, mode):
        super(Photo2Sketch_Dataset, self).__init__()

        self.hp = hp
        self.mode = mode
        hp.root_dir = '/home/media/On_the_Fly/Code_ALL/Final_Dataset'
        hp.dataset_name = 'ShoeV2'
        hp.seq_len_threshold = 251

        # coordinate_path = os.path.join(hp.root_dir, hp.dataset_name , hp.dataset_name + '_Coordinate')
        self.root_dir = os.path.join(hp.root_dir, hp.dataset_name)

        with open('./preprocess/ShoeV2_RDP_3', 'rb') as fp:
            self.Coordinate = pickle.load(fp)

        coordinate_refine = {}
        seq_len = []
        for key in self.Coordinate.keys():
            if len(self.Coordinate[key]) < 81:
                coordinate_refine[key] = self.Coordinate[key]
                seq_len.append(len(self.Coordinate[key]))
        self.Coordinate = coordinate_refine
        hp.max_seq_len = max(seq_len)
        hp.average_seq_len = int(np.round(np.mean(seq_len) + np.std(seq_len)))

        # greater_than_average = 0
        # for seq in seq_len:
        #     if seq > self.hp.average_len:
        #         greater_than_average +=1

        self.Train_Sketch = [
            x for x in self.Coordinate
            if ('train' in x) and (len(self.Coordinate[x]) < 130)
        ]  # separating trains
        self.Test_Sketch = [
            x for x in self.Coordinate
            if ('test' in x) and (len(self.Coordinate[x]) < 130)
        ]  # separating tests

        self.train_transform = get_transform('Train')
        self.test_transform = get_transform('Test')

        # # seq_len = []
        # # for key in self.Coordinate.keys():
        # #     seq_len += [len(self.Coordinate[key])]
        # # plt.hist(seq_len)
        # # plt.savefig('histogram of number of Coordinate Points.png')
        # # plt.close()
        # # hp.max_seq_len = max(seq_len)
        # hp.max_seq_len = 130
        """" Preprocess offset coordinates """
        self.Offset_Coordinate = {}
        for key in self.Coordinate.keys():
            self.Offset_Coordinate[key] = to_delXY(self.Coordinate[key])
        data = []
        for sample in self.Offset_Coordinate.values():
            data.extend(sample[:, 0])
            data.extend(sample[:, 1])
        data = np.array(data)
        scale_factor = np.std(data)

        for key in self.Coordinate.keys():
            self.Offset_Coordinate[key][:, :2] /= scale_factor
        """" <<< Preprocess offset coordinates >>> """
        """" <<<           Done                >>> """
Beispiel #22
0
def make_data(root_path, annotation_path, allow_reverse, mirror, balance,
              balance_proportions, subset, sample_duration, required_overlap,
              shuffle):
    assert (subset in ['train', 'val', 'test'],
            'subset "{}" is not "train", "val" or "test"'.format(subset))

    changevalue = {'validation': 'val', 'train': 'train'}
    subset = changevalue[subset]
    frame_format = lambda x: str(x).zfill(4) + '.png'

    data = []
    action_list = _load_list(annotation_path)
    local_track_action_dict = defaultdict(list)

    for action in action_list:

        local_track_action_dict[action[0]].append(action[1:])

    for local_track_path, actions in tqdm(local_track_action_dict.items(),
                                          desc='Creating a dataset'):

        # sort the list of action by the starting frame
        actions.sort(key=lambda a: a[1])
        if subset != actions[0][-2]: continue
        frame_dir = os.path.join(root_path, local_track_path)
        files = os.listdir(frame_dir)

        #  split the frames into samples
        for i in range(len(files) // sample_duration):
            use_sample = True
            sample_start = i * sample_duration + 1
            sample_end = min((i + 1) * sample_duration, len(files))

            # make sure all the frames in the sample are saved properly, if they're not ignore the sample
            frame_indices = list(range(sample_start, sample_end + 1))
            for index in frame_indices:
                frame_path = os.path.join(root_path, local_track_path,
                                          frame_format(index))
                if not os.path.exists(frame_path):
                    use_sample = False
                    break

            if not use_sample: continue

            # count the number of frames in sample that are part of the action
            action_label = 0
            for (label, start, end, frame, subset, hard_data) in actions:
                action_overlap = max(
                    0, end - sample_start +
                    1 if sample_start > start else sample_end - start + 1)
                if action_overlap > sample_duration * required_overlap:
                    action_label = label
                    break

            sample = {
                'path': frame_dir,
                'label': action_label,
                'frame_indices': frame_indices,
                'mirror': False
            }

            data.append(sample)

    reverse_data = []
    if allow_reverse:
        for sample in tqdm(data, desc='Reversing the data'):
            reverse = True if sample['label'] != 0 else random.random() > 0.5
            if reverse:
                reverse_data.append(_reverse_sample(sample))
    data.extend(reverse_data)

    mirror_data = []
    if mirror:
        for sample in tqdm(data, desc='Adding mirror flip'):
            mirror_data.append(_mirror_sample(sample))
    data.extend(mirror_data)

    labels = set([i['label'] for i in data])

    count_labels = {
        label: sum([1 for i in data if i['label'] == label])
        for label in labels
    }
    print('Dataset is generated. Label counts are:', count_labels)

    if balance:
        required = min(count_labels.values())

        if balance_proportions is not None:
            sampling_probabilities = {
                label: (required / value) * balance_proportions[label]
                for label, value in count_labels.items()
            }
        else:
            sampling_probabilities = {
                label: required / value
                for label, value in count_labels.items()
            }

        balanced_data = []
        for sample in tqdm(data, desc='Balancing dataset'):
            if random.random() < sampling_probabilities[sample['label']]:
                balanced_data.append(sample)

        data = balanced_data
        count_labels = {
            label: sum([1 for i in data if i['label'] == label])
            for label in labels
        }
        print('Balanced dataset is generated. Label counts are:', count_labels)
    if shuffle:
        random.shuffle(data)
    return data
Beispiel #23
0
def search(dirname, data):
    filenames = os.listdir(dirname)
    for filename in filenames:
        full_filename = os.path.join(dirname, filename, '*.png')
        data.extend(Get_Dataset(full_filename))
        print(full_filename + ' Load!')
for string in args.model:
    if '=' in string:
        update = parse_dotted(string)
    else:
        with open(string, 'r') as f:
            update = yaml.safe_load(f)
            # If the yaml file contains an entry with key `model` use that one instead
            if 'model' in update.keys():
                update = update['model']
    update_rec(model, update)

# Data from --data args
for path in args.data:
    path = Path(path).expanduser().resolve()
    if path.is_dir():
        data.extend(path.glob('*.pt'))
    elif path.is_file() and path.suffix == '.pt':
        data.append(path)
    else:
        raise ValueError(f'Invalid data: {path}')

# Options from --options args
for string in args.options:
    if '=' in string:
        update = parse_dotted(string)
    else:
        with open(string, 'r') as f:
            update = yaml.safe_load(f)
    update_rec(options, update)

# Resolving paths