コード例 #1
0
 def __getitem__(self, index):
     feature_path = self.imgs[index]
     label = self.labels[index]
     # 加载保存的特征
     data = np.load(feature_path)
     data.astype(np.float32)
     return data, label
コード例 #2
0
ファイル: _3_CNN.py プロジェクト: leejeyeol/Subjects
 def __getitem__(self, item):
     rawdata = self.data[[self.loader[item]]]
     data = np.reshape(rawdata[0][1:], [1, 28, 28])
     label = int(rawdata[0][0])
     data.astype(float)
     data = torch.FloatTensor(data)
     return data, label
コード例 #3
0
 def __getitem__(self, index):
     label = self.labels[index]
     all_data = []
     for i in range(len(self.paths)):
         feature_path = self.paths[i][index]
         data = np.load(feature_path)
         data.astype(np.float32)
         all_data.append(data)
     return all_data, label
コード例 #4
0
    def __getitem__(self, index):
        feature_path = self.imgs[index]
        label = self.labels[index]

        # 加载保存的特征
        data = np.load(feature_path)
#         data = np.where(data > 0, 1, 0)
        data = np.where(data > data[data.argsort()[::-1][40]], 1.0, 0.0)
        data.astype(np.float32)

        return data, label
コード例 #5
0
ファイル: lstm.py プロジェクト: hyeness/DetoxiPy
def preprocess(data, rem_bias):
    '''
    Cleans comment text by:
    1) removing selected punctuation marks, 
    2) homogenezing contractions,
    3) homogenezing selected proper names,
    4) correcting selected misspellings
    '''

    data = data.astype(str).apply(lambda x: pre.clean_special_chars(x))
    data = data.astype(str).apply(
        lambda x: pre.clean_contractions_and_spelling(x))
    if rem_bias:
        data = data.astype(str).apply(lambda x: pre.replace_identities(x))
    return data
コード例 #6
0
    def cluster(self, data, verbose=False):
        """Performs k-means clustering.
            Args:
                x_data (np.array N * dim): data to cluster
        """
        end = time.time()

        # PCA-reducing, whitening and L2-normalization
        if data.shape[1] >= 256:
            xb = preprocess_features(data)
        else:
            xb = data.astype('float32')
            row_sums = np.linalg.norm(xb, axis=1)
            xb = xb / row_sums[:, np.newaxis]
        # print(xb.shape)
        # exit()
        # cluster the data
        I, loss = run_kmeans(xb, self.k, verbose)

        self.feature = xb
        self.I = I

        self.images_lists = [[] for i in range(self.k)]
        for i in range(len(data)):
            self.images_lists[I[i]].append(i)

        if verbose:
            print('k-means time: {0:.0f} s'.format(time.time() - end))

        return loss
コード例 #7
0
 def __getitem__(self, index):
     data = np.load(self.img_path[index])
     data = data.astype(np.float32)
     if self.t:
         data = self.t(data)
     label = self.label_class[self.imgs[index].split('.')[0].split('_')[0]]
     return data, label
コード例 #8
0
ファイル: dataset.py プロジェクト: shenmayufei/code
    def __getitem__(self, idx):
        data = self.data[idx, :, :]
        data = data.astype(np.int32)
        img = Image.fromarray(data, mode='I')
        if self.training:
            degree = (random.random() - 0.5) * 10
            img = img.rotate(degree)

            if random.random() < 0.5:
                img = img.resize([32, 32], resample=Image.BICUBIC)  # 28->32
                # print('resize image size:', img.size)
                img = transform_crop(img)  # random crop 32->28
            else:
                img = transform_pad(img)  # padding 28->32
                # print('pad image size:', img.size)
                img = transform_crop(img)  # random crop 32->28
            # img.resize()

        # print('size 1:', img.size)
        img_array = np.array(img).reshape(28, 28, -1)
        # print('size 2:', img_array.shape)

        img_array = self.transform_default(img_array)  # HxWxC

        label = self.label[idx]
        # label = torch.LongTensor([label]).unsqueeze_(1)
        # label = torch.zeros(1, 10).scatter_(1,label,1).type_as(torch.LongTensor())
        # label = torch.LongTensor([label])  # just need class number
        return img_array, label
コード例 #9
0
 def double_check_size(self, data, gt_boxes, im_info):
     check = np.array(data.size(), dtype=np.int)
     scale_list = list(cfg.TRAIN.SCALES)
     scale_list.append(cfg.TRAIN.MAX_SIZE)
     scale_list = np.unique(np.array(scale_list))
     if not np.in1d(check, scale_list).any():
         import cv2
         data = data.numpy()
         gt_boxes = gt_boxes.numpy()
         im_info = im_info.numpy()
         data = data.astype(np.float32, copy=False)
         im_shape = data.shape
         im_size_min = np.min(im_shape[0:2])
         im_size_max = np.max(im_shape[0:2])
         target_size = min(cfg.TRAIN.SCALES)
         max_size = cfg.TRAIN.MAX_SIZE
         im_scale = float(target_size) / float(im_size_min)
         # Prevent the biggest axis from being more than MAX_SIZE
         if im_scale * im_size_max > max_size:
             im_scale = float(max_size) / float(im_size_max)
         data = cv2.resize(data,
                           None,
                           None,
                           fx=im_scale,
                           fy=im_scale,
                           interpolation=cv2.INTER_LINEAR)
         gt_boxes[:, 0:4] = gt_boxes[:, 0:4] * im_scale
         im_info = np.array(
             [[data.shape[0], data.shape[1], im_info[0][2] * im_scale]],
             dtype=np.float32)
         data = torch.from_numpy(data)
         gt_boxes = torch.from_numpy(gt_boxes)
         im_info = torch.from_numpy(im_info)
     return data, gt_boxes, im_info
コード例 #10
0
ファイル: mel2samp.py プロジェクト: alanderex/waveglow
def load_wav_to_torch(full_path):
    """
    Loads wavdata into torch array
    """
    sampling_rate, data = read(full_path)
    audio = torch.from_numpy(data.astype(np.float32))
    return audio, sampling_rate
コード例 #11
0
def piano_roll(path, receptive_field):
    with warnings.catch_warnings():
        warnings.simplefilter('ignore')
        song = pm.PrettyMIDI(midi_file=str(path))
    classes = [0, 3, 5, 7, 8, 9]
    limits = [[24, 96], [36, 84], [24, 96], [36, 84], [36, 84], [60, 96]]
    piano_rolls = [(_.get_piano_roll(fs=song.resolution), _.program)
                   for _ in song.instruments
                   if not _.is_drum and _.program // 8 in classes]
    length = np.amax([roll.shape[1] for roll, _ in piano_rolls])
    data_full = np.zeros(shape=(331, length))
    for roll, instrument in piano_rolls:
        i = classes.index(instrument // 8)
        sliced_roll = roll[limits[i][0]:limits[i][1]]
        data_full[limits[i][0]:limits[i][1]] += np.pad(
            sliced_roll, [(0, 0),
                          (0, length - sliced_roll.shape[1])], 'constant')
        data_full[325 + i] = 1
    if length < INPUT_LENGTH:
        data = np.pad(data_full, [(0, 0), (INPUT_LENGTH - length, 0)],
                      'constant')
    else:
        num = np.random.randint(0, length - INPUT_LENGTH + 1)
        data = data_full[:, num:INPUT_LENGTH + num]
    data[324] += 1 - data[:324].sum(axis=0)
    data = data > 0
    answer = np.transpose(data[:325, receptive_field + 1:], (1, 0))
    return data.astype(np.float32), answer.astype(np.float32)
コード例 #12
0
def _load_pkl_dict(path, key, idx=None, dtype='float32'):
    """Helper function to load pickled data.

    Parameters
    ----------
    path : :obj:`str`
        full file name including `.pkl` extention
    key : :obj:`str`
        data is returned from this key of the pickled dictionary
    idx : :obj:`int` or :obj:`NoneType`
        if :obj:`NoneType` return all data, else return data from this index
    dtype : :obj:`str`
        numpy data type of data

    Returns
    -------
    :obj:`list` of :obj:`numpy.ndarray` if :obj:`idx=None`
    :obj:`numpy.ndarray` is :obj:`idx=int`

    """
    with open(path, 'rb') as f:
        data_dict = pickle.load(f)

    if idx is None:
        samp = [data.astype(dtype) for data in data_dict[key]]
    else:
        samp = [data_dict[key][idx].astype(dtype)]

    return samp
コード例 #13
0
ファイル: FaceDataset.py プロジェクト: xingmurong/hr-cnn
 def do_transforms(self, data):
     toPIL = vision.transforms.ToPILImage()
     toTensor = vision.transforms.ToTensor()
     data = toTensor(
         self.transform(toPIL(data.astype('uint8').transpose(
             (1, 2, 0))))) * 255
     return data.numpy()
コード例 #14
0
def ImageNetLoad2(root, word, train=True):
    """
    Keras Data Loader
    """
    random.seed(1)
    if root[-1] != '/':
        root += '/'
    files = []
    labels = []
    root += 'train/'
    docs = os.listdir(root)
    for doc in docs:
        tmp = glob.glob(root + doc + '/images/*.jpg')
        files += tmp.copy()
        labels += [word[doc]] * len(tmp)
        del tmp
    c = list(zip(files, labels))
    random.shuffle(c)
    files, labels = zip(*c)
    n = len(labels)
    data = []
    for i in range(len(files)):
        data.append(img.imread(files[i]))
    data = np.stack(data)
    labels = np.array(labels)
    print(data.shape)
    print(labels.shape)

    return data.astype('float32'), labels.astype('float32')
def numpy_loader(path):
    data = np.load(path)
    data = data.astype("float32")
    if len(data.shape) > 2:
        data = np.swapaxes(data, 0, 1)
        data = np.swapaxes(data, 1, 2)
    return data
コード例 #16
0
 def __init__(self,
              root,
              train=True,
              transform=None,
              augment=False,
              dtype='float32'):
     self.root = os.path.expanduser(root)
     self.transform = transform
     self.train = train  # training set or test set
     if self.train:
         split = 'tr'
     else:
         split = 'te'
     matdata = sio.loadmat(root)
     R = matdata['X' + split][:, :32, :].transpose(2, 1, 0)
     G = matdata['X' + split][:, 32:64, :].transpose(2, 1, 0)
     B = matdata['X' + split][:, 64:, :].transpose(2, 1, 0)
     data = np.stack([R, G, B], axis=3)
     labels = [e[0] for e in matdata['Y' + split]]
     data = data.astype(dtype)
     labels = labels
     if self.train:
         self.train_data = data
         self.train_labels = labels
     else:
         self.test_data = data
         self.test_labels = labels
     self.augment = augment
コード例 #17
0
def preprocess(data):
    data = data.astype(float)
    rand = np.random.uniform(0, 1, data.shape)
    data += rand  #[0, 256]
    data = data / 128  #[0, 2]
    data -= 1  #[-1, 1]
    return data
コード例 #18
0
    def __getitem__(self, index):
        """Returns one data pair (image and caption)."""

        sample = self.samples[index]
        lbl = sample[2]
        vs = []
        if self.rotation:
            alpha = 2 * np.pi * np.random.rand()

        for i in range(self.seq_len):
            ffn = os.path.join(sample[0], '{:06d}'.format(sample[1] + i) + '.npz').replace('\\', '/')
            d = self.name2member[ffn]
            d = d.toarray()
            data = np.reshape(d, (self.w, self.h, self.d)).astype('float32')
            #data[data > 0 ] = 1.0
            #data *= 20
	
            #if self.rotation:
            #    data = scipy.ndimage.rotate(data, alpha, (0,1), reshape = False, order = 0, mode = 'nearest')
            
            p = np.random.uniform(0.7,1)
            r_idx = np.random.randint(6)
            mask = np.random.binomial(1, p, data.shape)
            #data = mask * data
            #if r_idx > 0:
            #    data[:,:,0:-r_idx] = data[:,:,r_idx:]
            #    data[:,:,-r_idx+1:] =  0
            data = data.astype('float32')
            vs.append(torch.from_numpy(data))

        return torch.stack(vs, dim = 0), torch.LongTensor([lbl])
コード例 #19
0
ファイル: dataloader.py プロジェクト: ynjang/Deep-BCI
    def __init__(self, data_path, dtype='train', data_dir='../data/'):
        all_data = open(data_dir + data_path).readlines()
        all_data = [elm[:-1] for elm in all_data]

        self.data = []
        self.y = []
        self.ndata = 0
        self.i = 0

        for j in range(len(all_data)):
            data = np.load(data_dir + all_data[j])
            if dtype == 'train':  # 501 to 1500 (for train)
                data = data[501:1501,
                            1:71]  # [time, dimension(channelxfrequency band)]
                self.ndata = 50
                self.i = 0
            elif dtype == 'test':  # 1501 to 2000 (for test)
                data = data[1501:2001, 1:71]
                self.ndata = 20
                self.i = 1501
            data = data.astype(float)
            for i in range(self.ndata):
                data_for_append = data[(i * 20):(i + 1) * 20, :]
                data_for_append.shape = 70, -1
                self.data.append(data_for_append)
                self.y.append(j)
コード例 #20
0
def full_piano_roll(path, receptive_field):
    with warnings.catch_warnings():
        warnings.simplefilter('ignore')
        song = pm.PrettyMIDI(midi_file=str(path))
    piano_rolls = [(_.get_piano_roll(fs=song.resolution), _.program)
                   for _ in song.instruments if not _.is_drum]
    drum_rolls = [(_.get_piano_roll(fs=song.resolution), _.program)
                  for _ in song.instruments if _.is_drum]
    length = np.amax([roll.shape[1] for roll, _ in piano_rolls + drum_rolls])
    data = np.zeros(shape=(129 * 129 + 1, length))
    for roll, instrument in piano_rolls:
        data[instrument * 128:(instrument + 1) * 128] += np.pad(
            roll, [(0, 0), (0, length - roll.shape[1])], 'constant')
        data[128 * 129 + instrument] = 1
    for roll, instrument in drum_rolls:
        data[128 * 128:128 * 129] += np.pad(roll,
                                            [(0, 0),
                                             (0, length - roll.shape[1])],
                                            'constant')
        data[129 * 129 - 1] = 1
    if length >= MAX_LENGTH:
        num = np.random.randint(0, length - MAX_LENGTH + 1)
        data = data[:, num:num + MAX_LENGTH]
    data[129 * 129] += 1 - data.sum(axis=0)
    data = data > 0
    answer = np.transpose(data[:, receptive_field + 1:], (1, 0))
    return data.astype(np.float32), answer.astype(np.float32)
コード例 #21
0
ファイル: dataset.py プロジェクト: wdi1210/cnn_traffic_data
 def __getitem__(self, index):
     address, label = self.dataset[index][0], self.dataset[index][1]
     root = '/home/dell/TrafficDataset/ISCXIDS2012/labeled_flows_xml/data_hex/tag_new'
     f = open(root+'/'+address, 'r')
     data = json.load(f)
     for item in data:
         for i, char in enumerate(item):
             char = int(char, 16)
             item[i] = char
     data = np.array(data)
     if self.model_type == 'lstm':
         import pdb;pdb.set_trace()
         data = torch.from_numpy((data.astype('float32') / 255).reshape((10, 160)))
     else:
         data = torch.from_numpy((data.astype('float32')/255).reshape((1, 1600)))
     return data, label
コード例 #22
0
def load_wav_to_torch(full_path, target_sr=None):
    if full_path.endswith('wav') and sf is not None:
        sampling_rate, data = read(
            full_path)  # scipy only supports .wav but reads faster...
    else:
        data, sampling_rate = sf.read(full_path,
                                      always_2d=True)[:, 0]  # than soundfile.

    if np.issubdtype(data.dtype, np.integer):  # if audio data is type int
        max_mag = -np.iinfo(
            data.dtype).min  # maximum magnitude = min possible value of intXX
    else:  # if audio data is type fp32
        max_mag = max(np.amax(data), -np.amin(data))
        max_mag = (2**31) + 1 if max_mag > (2**15) else (
            (2**15) + 1 if max_mag > 1.01 else 1.0
        )  # data should be either 16-bit INT, 32-bit INT or [-1 to 1] float32

    data = torch.FloatTensor(data.astype(np.float32)) / max_mag

    if target_sr is not None and sampling_rate != target_sr:
        data = torch.from_numpy(
            librosa.core.resample(data.numpy(), sampling_rate, target_sr))
        sampling_rate = target_sr

    data = data.clamp(min=-0.9999, max=0.9999)

    return data, sampling_rate
コード例 #23
0
 def y(self) -> np.ndarray:
     data = CLoudBand.gt.read(self.pattern, self.uri)
     if data is None:
         return None
     # data = np.dstack((data, 1 - data))
     # data = np.moveaxis(data, -1, 0)
     data = np.expand_dims(data, 0)
     return data.astype(np.float)
コード例 #24
0
def preprocess(data):
    punct = "/-'?!.,#$%\'()*+-/:;<=>@[\\]^_`{|}~`" + '""“”’' + '∞θ÷α•à−β∅³π‘₹´°£€\×™√²—–&'

    def clean_special_chars(text, punct):
        for p in punct:
            text = text.replace(p, ' ')
        return text.lower()

    remove_words = ['and', 'of']

    def remove_word(text, remove_words):
        text_temp = [i for i in text.split(' ') if i not in remove_words]
        return ' '.join(text_temp)

    data = data.astype(str).apply(lambda x: clean_special_chars(x, punct))
    data = data.astype(str).apply(lambda x: remove_word(x, remove_words))
    return data
コード例 #25
0
def TestForMirrorSymmetricTransform():
    file_path = '/home/lz/few-shot-complex-polar-sar-image-classification/dataset/11.jpg'
    data = np.array(Image.open(file_path))
    print(data.shape)
    data = MirrorSymmetricTransform(data, 1000)
    img = Image.fromarray(data.astype('uint8').transpose(
        (1, 2, 0))).convert('RGB')
    img.save('./test.jpg')
コード例 #26
0
ファイル: dataset.py プロジェクト: zccjjj/PointNet
    def __getitem__(self, index):
        path = 'E:\CG-pointcloud\master\modelnet40_ply_hdf5_2048\ply_data_train1.h5'
        f = h5py.File(path)
        data = f['data'][:]
        label = f['label'][:]

        point_set = torch.from_numpy(data.astype(np.float32))
        cls = torch.from_numpy(np.array([label]).astype(np.int64))

        return point_set, cls
コード例 #27
0
def preprocess(data):

    punct = "/-'?!.,#$%\'()*+-/:;<=>@[\\]^_`{|}~`" + '""“”’' + '∞θ÷α•à−β∅³π‘₹´°£€\×™√²—–&'

    def clean_special_chars(text, punct):
        for p in punct:
            text = text.replace(p, ' ')
        return text

    data = data.astype(str).apply(lambda x: clean_special_chars(x, punct))
    return data
コード例 #28
0
ファイル: mmnist.py プロジェクト: kkew3/dolphins-mc
 def sample_img(data, n: int, n_total_keys: int) -> np.ndarray:
     N, T, H, W = data.shape
     n_samples = min(1 + int(MAX_MEMORY_BYTES /
                             (n_total_keys * H * W * 8)), n)
     if n_samples < n:
         logging.warning('reduced n_samples from %d to %d '
                         'due to memory limit',
                         n, n_samples)
     ind = np.random.permutation(N * T)[:n_samples]
     data = np.copy(data.reshape((N * T, H * W))[ind])
     return data.astype(np.float64) / 255
コード例 #29
0
 def forward(ctx, xyz, normal):
     xyz = xyz.permute(0, 2, 1)
     normal = normal.permute(0, 2, 1)
     data = ((xyz + 2) * 4096).cpu().numpy()
     data = data.astype(dtype=np.uint32)
     assert data.shape[-1] == 3
     z_order_code = torch.from_numpy(z_order_encode(data)).cuda()
     _, idx = torch.sort(z_order_code, dim=1)
     batch_idx = torch.arange(xyz.shape[0]).reshape(xyz.shape[0], 1, 1)
     a, b = xyz[batch_idx, idx].squeeze(2), normal[batch_idx,
                                                   idx].squeeze(2)
     return a, b
コード例 #30
0
def preprocess(data):
    '''
    Credit goes to https://www.kaggle.com/gpreda/jigsaw-fast-compact-solution
    '''
    punct = "/-'?!.,#$%\'()*+-/:;<=>@[\\]^_`{|}~`" + '""“”’' + '∞θ÷α•à−β∅³π‘₹´°£€\×™√²—–&'
    def clean_special_chars(text, punct):
        for p in punct:
            text = text.replace(p, ' ')
        return text

    data = data.astype(str).apply(lambda x: clean_special_chars(x, punct))
    return data