def _init_B(self): folder_saving = self.folder_saving m = self.m folder_bi = join(folder_saving, npfn('bi')) folder_bs = join(folder_saving, npfn('bs')) if os.path.exists(folder_bi) and os.path.exists(folder_bs): self._print("Init BI matrix from {} and BS matrix from {}.".format( folder_bi, folder_bs)) self.BI = np.load(join(folder_saving, npfn('bi'))) self.BS = np.load(join(folder_saving, npfn('bs'))) else: self._print("Reinit BI and BS matrix!") self.BI = np.random.randint(0, 2, [self.lens[IM], m]) * 2 - 1 self.BS = np.random.randint(0, 2, [self.lens[SK], m]) * 2 - 1
def _init_D(self): d_file = join(self.folder_saving, npfn('d')) if os.path.exists(d_file): self._print("Init D matrix from {}".format(d_file)) self.D = np.load(d_file) else: self._print( "Reinit D matrix. It is OK since D can be inferred from BI and BS." ) self.D = np.random.rand(self.d(), self.m)
def _create_im2(folder_im, folder_imsk, folder_im2): if not os.path.exists(folder_im2): os.mkdir(folder_im2) for cls in os.listdir(folder_im): ims = [] fils_imsk = set( [f.split('.')[0] for f in os.listdir(join(folder_imsk, cls))]) for name in os.listdir(join(folder_im, cls)): if name.split('.')[0] not in fils_imsk: continue img = cv2.imread(join(folder_im, cls, name)) img = cv2.resize(img, (IMAGE_SIZE, IMAGE_SIZE)) ims.append(img) print(cls, len(fils_imsk), len(ims)) np.save(join(folder_im2, cls + npfn('_im2')), np.asarray(ims, dtype=np.uint8))
def _try_save_ims(self, folder_nps, name, data_of_name): if folder_nps: if not os.path.exists(join(folder_nps, npfn(name + '_imsk'))): np.save(join(folder_nps, npfn(name + '_imsk')), data_of_name[IMSK]) if not os.path.exists(join(folder_nps, npfn(name + '_sk'))): np.save(join(folder_nps, npfn(name + '_sk')), data_of_name[SK]) if not os.path.exists(join(folder_nps, npfn(name + '_im2'))): np.save(join(folder_nps, npfn(name + '_im2')), data_of_name[IM]) assert len(data_of_name[IM]) == len( data_of_name[IMSK] ), 'Sketch token and images must satisfy one-to-one \ correspondence. (Error while disposing class {})'.format(name)
def __init__(self, folder_saving, path_semantic, folder_sk=None, folder_im=None, folder_imsk=None, clss=None, normalize01=False, doaug=True, folder_nps=None, m=300, logger=None): """ Attirbute: BS, ns * m BI, ni * m D, d * m vec_bs, ns * d vec_bi, ni * d Ws ~ ni * ns, implemented with memory mapping. :param folder_sk: sketch folder :param folder_im: image folder :param folder_imsk: image's sketch token folder. ATTENTION: this folder contains sketch tokens of the corresponding images, not images! :param clss: classes to load :param normalize01: whether normalize data to 0-1 :param doaug: whether do data augmentation :param folder_nps: the folder saves npy files. This allow fewer inodes to save the datasets(the server does not allow too many inodes allocated). The folder should contain classname1_sk.npy, classname1_imsk.npy, classname1_im.npy, classname2_sk.npy, classname2_imsk.npy, classname1_im.npy, ... 1. If folder_nps is None, folder_sk and folder_imsk must be provided. 2. If folder_nps is not None but no files exist in the folder, folder_sk and folder_im must be provided, and such files would be created in folder_nps. 3. If folder_nps is not None and files exist in the folder, load the files instead of those in folder_sk and folder_imsk for training. :param path_semantic: path of the semantic vector(xxx.pkl). It should be a dict: {class_name1: [b1, b2, ...], class_name2: [b1, b2, ...]} :param m: number of binary bits :param folder_saving: folder to save/load binary codes :param logger: logger to debug. """ super(DSH_dataloader, self).__init__() self.idx2skim_pair = [] self.logger = logger self.normalize01 = normalize01 self.doaug = doaug self._build_trans() self.cls2idx = {} self.idx2cls = [] self.semantics = [] self.lens = [0, 0, 0] self.folder_saving = folder_saving self.clss = clss self.m = m self.vec_bi = [] self.vec_bs = [] self.label_all_i = [] self.label_all_s = [] folders = [folder_sk, folder_imsk, folder_im] if not os.path.exists(folder_saving): os.mkdir(folder_saving) semantics = pickle.load(open(path_semantic, 'rb')) if folder_nps and not os.path.exists(folder_nps): os.mkdir(folder_nps) for name in clss: self.semantics.append(semantics[name]) if all([os.path.exists(str(fd)) for fd in folders]): sks_folder = join(folders[SK], name) imsks_folder = join(folders[IMSK], name) ims_folder = join(folders[IM], name) # print(folder_nps, name, join(folder_nps, npfn(name + '_imsk')), os.path.exists(join(folder_nps, npfn(name + '_imsk')))) if folder_nps and os.path.exists( join(folder_nps, npfn(name + '_imsk'))): data_of_name = [ np.load(join(folder_nps, npfn(name + '_sk'))), np.load(join(folder_nps, npfn(name + '_imsk'))), np.load(join(folder_nps, npfn(name + '_im2'))) ] # print(data_of_name[SK].shape, data_of_name[IMSK].shape, data_of_name[IM].shape) else: data_of_name = self._get_data_from_ims( sks_folder=sks_folder, imsks_folder=imsks_folder, ims_folder=ims_folder) data_of_name = self._process(data_of_name) self._try_save_ims(folder_nps=folder_nps, name=name, data_of_name=data_of_name) for i in range(3): self.lens[i] += len(data_of_name[i]) self.vec_bi += [ semantics[name] for _ in range(len(data_of_name[IM])) ] self.vec_bs += [ semantics[name] for _ in range(len(data_of_name[SK])) ] self.idx2skim_pair.append(data_of_name) self.cls2idx[name] = len(self.idx2cls) self.idx2cls.append(name) self.label_all_i.append( np.zeros(len(data_of_name[IM])) + self.cls2idx[name]) self.label_all_s.append( np.zeros(len(data_of_name[SK])) + self.cls2idx[name]) self.semantics = np.asarray(self.semantics) self._print( 'Dataset loaded from folder_sk:{}, folder_imsk:{}, folder_im:{}, folder_nps:{}, sk_len:{},\ imsk_len:{}, im_len:{}'.format(folder_sk, folder_imsk, folder_im, folder_nps, self.lens[SK], self.lens[IMSK], self.lens[IM])) self.vec_bs = np.asarray(self.vec_bs) self.vec_bi = np.asarray(self.vec_bi) self.label_all_i = np.hstack(self.label_all_i) self.label_all_s = np.hstack(self.label_all_s) self._init_W(label_all_i=self.label_all_i, label_all_s=self.label_all_s) self._init_B() self._init_D() self._print('Dataset init done.')
def save_params(self): np.save(join(self.folder_saving, npfn('bi')), self.BI) np.save(join(self.folder_saving, npfn('bs')), self.BS) np.save(join(self.folder_saving, npfn('d')), self.D)
def __init__(self, folder_sk, folder_imsk, clss, normalize01=False, doaug=True, exp3ch=True, folder_nps=None, dis2sim=10): """ :param folder_sk: sketch folder :param folder_imsk: image's sketch token folder. ATTENTION: this folder contains sketch tokens of the corresponding images, not images! :param clss: classes to load :param normalize01: whether normalize data to 0-1 :param doaug: whether do data augmentation :param exp3ch: whether force the sketches to expand to 3 channels :param folder_nps: the folder saves npy files. This allow fewer inodes to save the datasets(the server does not allow too many inodes allocated). The folder should contain classname1_sk.npy, classname1_imsk.npy, classname2_sk.npy, classname2_imsk.npy, ... 1. If folder_nps is None, folder_sk and folder_imsk must be provided. 2. If folder_nps is not None but no files exist in the folder, folder_sk and folder_im must be provided, and such files would be created in folder_nps. 3. If folder_nps is not None and files exist in the folder, load the files instead of those in folder_sk and folder_imsk for training. :param dis2sim: The ratio of dissimilar pairs to similar pairs. """ super(D3Shape_dataloader, self).__init__() self.idx2skim_pair = [] self.normalize01 = normalize01 self.dis2sim = dis2sim self.doaug = doaug self.exp3ch = exp3ch self._build_trans() self.cls2idx = {} self.idx2cls = [] self.lens = [0, 0] if folder_nps and not os.path.exists(folder_nps): os.mkdir(folder_nps) for name in clss: if os.path.exists(folder_sk) and os.path.exists(folder_imsk): sks_folder = join(folder_sk, name) imsks_folder = join(folder_imsk, name) if folder_nps and os.path.exists( join(folder_nps, npfn(name + '_imsk'))): to_app = [ np.load(join(folder_nps, npfn(name + '_sk'))), np.load(join(folder_nps, npfn(name + '_imsk'))) ] # print(to_app[SK].shape, to_app[IMSK].shape) else: to_app = [[ self._prep_img(join(sks_folder, path)) for path in os.listdir(sks_folder) if path.endswith('.jpg') or path.endswith('.png') ], [ self._prep_img(join(imsks_folder, path)) for path in os.listdir(imsks_folder) if path.endswith('.jpg') or path.endswith('.png') ]] to_app[SK] = np.asarray(to_app[SK], dtype=np.uint8) to_app[IMSK] = np.asarray(to_app[IMSK], dtype=np.uint8) if folder_nps and not os.path.exists( join(folder_nps, npfn(name + '_imsk'))): # np.save(join(folder_nps, npfn(name + '_sk')), to_app[SK]) np.save(join(folder_nps, npfn(name + '_imsk')), to_app[IMSK]) to_app[SK] = [Image.fromarray(img) for img in to_app[SK]] to_app[IMSK] = [Image.fromarray(img) for img in to_app[IMSK]] self.idx2skim_pair.append(to_app) self.cls2idx[name] = len(self.idx2cls) self.idx2cls.append(name) self.lens[SK] += len(to_app[SK]) self.lens[IMSK] += len(to_app[IMSK]) print( 'Dataset loaded from folder_sk:{}, folder_imsk:{}, folder_nps:{}, sk_len:{}, imsk_len:{}' .format(folder_sk, folder_imsk, folder_nps, self.lens[SK], self.lens[IMSK])) self.clss = clss