def loadValidationData(percentage=0.3, size=224): ''' percentage: only load the last percentage*100% as validation data size: image will be resize to size*size return data, label data: Variable(torch.FloatTensor) ( 3764*percentage, 3, size, size) label: torch.LongTensor (3764*percentage) ''' # constant labelPath = '../json/label.dat' imgPath = '../train/' # make label dict labelDict = dict() labelFile = open(labelPath, 'r') for line in labelFile: idx, brand = line.strip().split(" ") labelDict[brand] = int(idx) labelFile.close() # get the number of training pic valsize = 0 for brand in labelDict: brandPath = imgPath + brand allImg = os.listdir(brandPath) totalImg = len(allImg) num = totalImg - int(totalImg * (1 - percentage)) valsize += num valx = torch.empty(valsize, 3, size, size) valy = [] # read image. brand by brand count = 0 for brand in labelDict: # for each brand, get the num of imgs in training set brandPath = imgPath + brand allImg = os.listdir(brandPath) totalImg = len(allImg) num = int(totalImg * (1 - percentage)) # select first num of imgs for idx, img in enumerate(allImg): if idx >= num: imgTmp = preprocess.readImage(brandPath + "/" + img) imgTmp2 = preprocess.preprocess(imgTmp, size) imgout = torch.from_numpy(imgTmp2).permute(0, 3, 1, 2).type( torch.FloatTensor) valy.append(labelDict[brand]) valx[count:(count + 1)] = imgout count += 1 # end1 = time.time() # print("finish computation in " + str(end1 -start) ) # convert to Tensor label = torch.LongTensor(valy) data = Variable(valx) print(count) print(label.shape) print(data.shape) # return return data, label
def loadImagesList(filelist, size): ''' load images in a list of filename. filelist: list [str, str, ...]. a list of filename return: Variable(torch.FloatTensor) ( len(filelist), 3, size, size) ''' # constant labelPath = '../json/label.dat' imgPath = '../train/' valsize = len(filelist) valx = torch.empty(valsize, 3, size, size) # read image. count = 0 # select first num of imgs for imgName in filelist: imgTmp = preprocess.readImage(imgName) imgTmp2 = preprocess.preprocess(imgTmp, size) imgout = torch.from_numpy(imgTmp2).permute(0, 3, 1, 2).type(torch.FloatTensor) valx[count:(count + 1)] = imgout count += 1 data = Variable(valx) print(count) print(data.shape) return data
def loadValDataFromFile(valFile, size=224, percentage=1.0): ''' load validation data from a txt file. the format of valFile: "spu_id img_path category_id" percentage: only take percentage*100% of whole images in valFile as validation set. percentage = 0.02 is usually used as validation set in learning curve size: image will be resize to size*size return data, label data: Variable(torch.FloatTensor) ( num of images , 3, size, size) label: torch.LongTensor (num of images) ''' val = open(valFile, 'r') count = 0 for line in val: count += 1 valsize = int(count * percentage) val.seek(0) valy = [] valx = torch.empty(valsize, 3, size, size) count = 0 for line in val: if count < valsize: spuid, img, cat = line.strip().split(" ") impTmp = preprocess.readImage(img) imgTmp2 = preprocess.preprocess(imgTmp, size) imgout = torch.from_numpy(imgTmp2).permute(0, 3, 1, 2).type( torch.FloatTensor) valy.append(int(cat)) valx[count:(count + 1)] = imgout count += 1 label = torch.LongTensor(valy) data = Variable(valx) print(count) print(label.shape) print(data.shape) # return return data, label
def loadTrainDataFromFile(trainFile, size=224, percentage=1): ''' load training data from a txt file. the format of trainFile: "spu_id img_path, category_id" percentage: only take percentage*100% of images in trainFile as training set. percentage = 0.1 is usually used for debug. size: images will be resize to size * size. return: data, label data: torch.FloatTensor ( num of images, 3, size, size) label: torch.LongTensor ( num of images ) ''' train = open(trainFile, 'r') count = 0 for line in train: count += 1 trainsize = int(count * percentage) train.seek(0) trainy = [] trainx = torch.empty(trainsize, 3, size, size) count = 0 for line in train: if count < trainsize: idx, img, cat = line.strip().split(" ") impTmp = preprocess.readImage(img) imgTmp2 = preprocess.preprocess(imgTmp, size) imgout = torch.from_numpy(imgTmp2).permute(0, 3, 1, 2).type( torch.FloatTensor) trainy.append(int(cat)) trainx[count:(count + 1)] = imgout count += 1 label = torch.LongTensor(trainy) print(count) print(label.shape) print(trainx.shape) return trainx, label
def loadTrainData(percentage=1.0, size=224): ''' percentage: only load first percentage*100% as training set. size: images will be resize to size * size. return: data, label data: torch.FloatTensor ( 3764*percentage, 3, size, size) label: torch.LongTensor (3764*percentage) ''' # start = time.time() # constant labelPath = '../json/label.dat' imgPath = '../train/' # make label dict labelDict = dict() labelFile = open(labelPath, 'r') for line in labelFile: idx, brand = line.strip().split(" ") labelDict[brand] = int(idx) labelFile.close() # get the number of training pic trainsize = 0 for brand in labelDict: brandPath = imgPath + brand allImg = os.listdir(brandPath) totalImg = len(allImg) num = int(totalImg * percentage) trainsize += num trainx = torch.empty(trainsize, 3, size, size) trainy = [] # read image. brand by brand count = 0 for brand in labelDict: # for each brand, get the num of imgs in training set brandPath = imgPath + brand allImg = os.listdir(brandPath) totalImg = len(allImg) num = int(totalImg * percentage) # select first num of imgs for idx, img in enumerate(allImg): if idx == 1000: print(img) # output should be: # d792d0eba6b447049a28686b9298915d.jpg # 6df9e31532af4810a02d5abb9548f1f1.jpg if idx < num: imgTmp = preprocess.readImage(brandPath + "/" + img) imgTmp2 = preprocess.preprocess(imgTmp, size) imgout = torch.from_numpy(imgTmp2).permute(0, 3, 1, 2).type( torch.FloatTensor) trainy.append(labelDict[brand]) trainx[count:(count + 1)] = imgout count += 1 # end1 = time.time() # print("finish computation in " + str(end1 -start) ) # convert to Tensor label = torch.LongTensor(trainy) print(count) print(label.shape) print(trainx.shape) # return return trainx, label