def load_dataset(opt): """Loads the input datasets.""" print('Reading dataset ', opt.dataset) trainset = datasets.Fashion200k( path=opt.dataset_path, split='train', transform=torchvision.transforms.Compose([ torchvision.transforms.Resize(224), torchvision.transforms.CenterCrop(224), torchvision.transforms.ToTensor(), torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ])) testset = datasets.Fashion200k( path=opt.dataset_path, split='test', transform=torchvision.transforms.Compose([ torchvision.transforms.Resize(224), torchvision.transforms.CenterCrop(224), torchvision.transforms.ToTensor(), torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ])) print('trainset size:', len(trainset)) print('testset size:', len(testset)) return trainset, testset
def GetValuestestloaded(): with open(Path1 + "\\BetatestLoaded.txt", 'rb') as fp: BetaNormalize = pickle.load(fp) trainset = datasets.Fashion200k( path=Path1, split='train', transform=torchvision.transforms.Compose([ torchvision.transforms.Resize(224), torchvision.transforms.CenterCrop(224), torchvision.transforms.ToTensor(), torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ])) testset = datasets.Fashion200k( path=Path1, split='test', transform=torchvision.transforms.Compose([ torchvision.transforms.Resize(224), torchvision.transforms.CenterCrop(224), torchvision.transforms.ToTensor(), torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ])) trig = img_text_composition_models.TIRG( [t.encode().decode('utf-8') for t in trainset.get_all_texts()], 512) trig.load_state_dict( torch.load(Path1 + r'\fashion200k.tirg.iter160k.pth', map_location=torch.device('cpu'))['model_state_dict']) opt = argparse.ArgumentParser() opt.add_argument('--batch_size', type=int, default=2) opt.add_argument('--dataset', type=str, default='fashion200k') opt.batch_size = 1 opt.dataset = 'fashion200k' for name, dataset in [('train', trainset), ('test', testset)]: #('train', trainset), betaNor = test_retrieval.testLoadedBeta(opt, trig, dataset, BetaNormalize) print(name, ' BetaNormalized: ', betaNor) asbook = test_retrieval.testLoaded(opt, trig, dataset) print(name, ' As PaPer: ', asbook)
def ab_Mgetvaluesfilesaved(option): trainset = datasets.Fashion200k( path=Path1, split='train', transform=torchvision.transforms.Compose([ torchvision.transforms.Resize(224), torchvision.transforms.CenterCrop(224), torchvision.transforms.ToTensor(), torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ])) testset = datasets.Fashion200k( path=Path1, split='test', transform=torchvision.transforms.Compose([ torchvision.transforms.Resize(224), torchvision.transforms.CenterCrop(224), torchvision.transforms.ToTensor(), torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ])) trig = img_text_composition_models.TIRG( [t.encode().decode('utf-8') for t in trainset.get_all_texts()], 512) trig.load_state_dict( torch.load(Path1 + r'\fashion200k.tirg.iter160k.pth', map_location=torch.device('cpu'))['model_state_dict']) opt = argparse.ArgumentParser() opt.add_argument('--batch_size', type=int, default=2) opt.add_argument('--dataset', type=str, default='fashion200k') opt.batch_size = 1 opt.dataset = 'fashion200k' #for name, dataset in [ ('train', trainset),('test', testset)]: #('train', trainset), for name, dataset in [('test', testset)]: #for name, dataset in [ ('train', trainset)]: #('train', trainset), asbook1, model, euc_model = ab_all_neural_testLoaded( opt, trig, dataset, option) print(name, ' Loaded As PaPer: ', asbook1, '\n model generated ', model, '\n euc model', euc_model)
def testvaluessame(): train = datasets.Fashion200k( path=Path1, split='train', transform=torchvision.transforms.Compose([ torchvision.transforms.Resize(224), torchvision.transforms.CenterCrop(224), torchvision.transforms.ToTensor(), torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ])) transform = torchvision.transforms.Compose([ torchvision.transforms.Resize(224), torchvision.transforms.CenterCrop(224), torchvision.transforms.ToTensor(), torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) trig = img_text_composition_models.TIRG( [t.encode().decode('utf-8') for t in train.get_all_texts()], 512) trig.load_state_dict( torch.load(Path1 + r'\fashion200k.tirg.iter160k.pth', map_location=torch.device('cpu'))['model_state_dict']) trig.eval() query = 'women/tops/blouses/91422080/91422080_0.jpeg' qttext = 'replace sunrise with pleat-neck' target = 'women/tops/sleeveless_and_tank_tops/90068628/90068628_0.jpeg' text = [] text.append(qttext) text.append(qttext) img = Image.open(Path1 + '/' + query) img = img.convert('RGB') img = transform(img) img2 = Image.open(Path1 + '/' + target) img2 = img2.convert('RGB') img2 = transform(img2) img = img.unsqueeze_(0) img2 = img2.unsqueeze_(0) images = torch.cat([img, img2], dim=0) trigdataQ = trig.compose_img_text(images, text) trigdataQ1 = trig.compose_img_text(images, text) print('...........') print(trigdataQ) print(trigdataQ1)
def savesourcevalues(): train = datasets.Fashion200k( path=Path1, split='train', transform=torchvision.transforms.Compose([ torchvision.transforms.Resize(224), torchvision.transforms.CenterCrop(224), torchvision.transforms.ToTensor(), torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ])) test = datasets.Fashion200k( path=Path1, split='test', transform=torchvision.transforms.Compose([ torchvision.transforms.Resize(224), torchvision.transforms.CenterCrop(224), torchvision.transforms.ToTensor(), torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ])) trig = img_text_composition_models.TIRG( [t.encode().decode('utf-8') for t in train.get_all_texts()], 512) trig.load_state_dict( torch.load(Path1 + r'\fashion200k.tirg.iter160k.pth', map_location=torch.device('cpu'))['model_state_dict']) opt = argparse.ArgumentParser() opt.add_argument('--batch_size', type=int, default=2) opt.add_argument('--dataset', type=str, default='fashion200k') opt.batch_size = 1 opt.dataset = 'fashion200k' datasets.Features172K().SavetoFilesImageSource(Path1 + r'/dataset172', trig, train, opt) print('172 Finished')
def load_dataset(opt): """Loads the input datasets.""" print('Reading dataset: ', opt.dataset) if opt.dataset == 'fashion200k': trainset = datasets.Fashion200k( path=opt.dataset_path, split='train', transform=torchvision.transforms.Compose([ # Resize 的输入为一个数的时候,会将维度最低的转换为输入的数,同时图像等比例缩放 # 也可以输入tuple,即转换为tuple大小 torchvision.transforms.Resize(224), # CenterCrop(x) 即从中心位置裁剪一个输入大小 x 的图像 torchvision.transforms.CenterCrop(224), # ToTensor会将PIL类型的图片(H, W, C) 转换为 (C, H, W),同时除以255,即所有元素都在[0,1] # 如果要将(C, H, W) 转换回 (H, W, C) 可以使用tensor的permute(1, 2, 0) torchvision.transforms.ToTensor(), # 此处输入的参数,第一个列表分别是3个通道的mean(均值) # 第二个列表分别是3个通道的std(方差) # Normalize --> output[channel] = (input[channel] - mean[channel]) / std[channel] torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), ])) testset = datasets.Fashion200k( path=opt.dataset_path, split='test', transform=torchvision.transforms.Compose([ torchvision.transforms.Resize(224), torchvision.transforms.CenterCrop(224), torchvision.transforms.ToTensor(), torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ])) else: print('Invalid dataset: ', opt.dataset) sys.exit() print('trainset size: ', str(len(trainset))) print('testset size: ', str(len(testset))) return trainset, testset
def load_dataset(opt): """Loads the input datasets.""" print('Reading dataset ', opt.dataset) if opt.dataset == 'fashion200k': trainset = datasets.Fashion200k( path=opt.dataset_path, split='train', transform=torchvision.transforms.Compose([ torchvision.transforms.Resize(224), torchvision.transforms.CenterCrop(224), torchvision.transforms.ToTensor(), torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ])) testset = datasets.Fashion200k( path=opt.dataset_path, split='test', transform=torchvision.transforms.Compose([ torchvision.transforms.Resize(224), torchvision.transforms.CenterCrop(224), torchvision.transforms.ToTensor(), torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ])) elif opt.dataset == 'mitstates': trainset = datasets.MITStates( path=opt.dataset_path, split='train', transform=torchvision.transforms.Compose([ torchvision.transforms.Resize(224), torchvision.transforms.CenterCrop(224), torchvision.transforms.ToTensor(), torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ])) testset = datasets.MITStates( path=opt.dataset_path, split='test', transform=torchvision.transforms.Compose([ torchvision.transforms.Resize(224), torchvision.transforms.CenterCrop(224), torchvision.transforms.ToTensor(), torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ])) elif opt.dataset == 'fashionIQ': trainset = datasets.FashionIQ( path=opt.dataset_path, cat_type=opt.category_to_train, split='train', transform=torchvision.transforms.Compose([ torchvision.transforms.Resize(224), torchvision.transforms.CenterCrop(224), torchvision.transforms.ToTensor(), torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ])) testset = datasets.FashionIQ( path=opt.dataset_path, cat_type=opt.category_to_train, split='val', transform=torchvision.transforms.Compose([ torchvision.transforms.Resize(224), torchvision.transforms.CenterCrop(224), torchvision.transforms.ToTensor(), torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ])) else: print('Invalid dataset', opt.dataset) sys.exit() print('trainset size:', len(trainset)) print('testset size:', len(testset)) return trainset, testset
def distanceBetaand(): with open(Path1 + "/Beta.txt", 'rb') as fp: Beta = pickle.load(fp) trainset = datasets.Fashion200k( path=Path1, split='train', transform=torchvision.transforms.Compose([ torchvision.transforms.Resize(224), torchvision.transforms.CenterCrop(224), torchvision.transforms.ToTensor(), torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ])) test = datasets.Fashion200k( path=Path1, split='test', transform=torchvision.transforms.Compose([ torchvision.transforms.Resize(224), torchvision.transforms.CenterCrop(224), torchvision.transforms.ToTensor(), torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ])) trig = img_text_composition_models.TIRG( [t.encode().decode('utf-8') for t in trainset.get_all_texts()], 512) trig.load_state_dict( torch.load(Path1 + r'\fashion200k.tirg.iter160k.pth', map_location=torch.device('cpu'))['model_state_dict']) trig.eval() imgs = [] mods = [] target = [] batchsize = 2 Distance = [] sourceid = [] targetid = [] countbeta = 0 counttrig = 0 for Data in tqdm(trainset): imgs += [Data['source_img_data']] mods += [Data['mod']['str']] target += [Data['target_img_data']] sourceid.append(Data['source_img_id']) targetid.append(Data['target_img_id']) imgs = torch.stack(imgs).float() imgs = torch.autograd.Variable(imgs) f = trig.compose_img_text(imgs, mods).data.cpu().numpy() target = torch.stack(target).float() target = torch.autograd.Variable(target) f2 = trig.extract_img_feature(target).data.cpu().numpy() trigdata = f[0] trigbeta = np.insert(trigdata, 0, 1) trigbeta = np.matmul(trigbeta, Beta) Targetdata = f2[0] SourceTarget = euclideandistance(trigdata, Targetdata) betaTarget = euclideandistance(trigbeta, Targetdata) if (SourceTarget > betaTarget): countbeta = countbeta + 1 else: counttrig = counttrig + 1 # opsig={'source':sourceid[0],'target':targetid[0],'disbeta':betaTarget,'disorig':SourceTarget} # Distance.append(opsig ) imgs = [] mods = [] target = [] sourceid = [] targetid = [] with open(Path1 + r"/" + 'Distance.txt', 'wb') as fp: pickle.dump(Distance, fp) print('Train Data :Count beta less:', countbeta, ' ,countbeta bigger:', counttrig) imgs = [] mods = [] target = [] batchsize = 2 Distance = [] sourceid = [] targetid = [] countbeta = 0 counttrig = 0 for Data in tqdm(test.get_test_queries()): imgs += [test.get_img(Data['source_img_id'])] mods += [Data['mod']['str']] target += [test.get_img(Data['target_id'])] imgs = torch.stack(imgs).float() imgs = torch.autograd.Variable(imgs) f = trig.compose_img_text(imgs, mods).data.cpu().numpy() target = torch.stack(target).float() target = torch.autograd.Variable(target) f2 = trig.extract_img_feature(target).data.cpu().numpy() trigdata = f[0] trigbeta = np.insert(trigdata, 0, 1) trigbeta = np.matmul(trigbeta, Beta) Targetdata = f2[0] SourceTarget = euclideandistance(trigdata, Targetdata) betaTarget = euclideandistance(trigbeta, Targetdata) if (SourceTarget > betaTarget): countbeta = countbeta + 1 else: counttrig = counttrig + 1 imgs = [] mods = [] target = [] sourceid = [] targetid = [] print('Test Data :Count beta less:', countbeta, ' ,countbeta bigger:', counttrig)
def GetValuestrain15time(): with open(Path1 + "/trainBetaNormalized.txt", 'rb') as fp: BetaNormalize = pickle.load(fp) trainset = datasets.Fashion200k( path=Path1, split='train', transform=torchvision.transforms.Compose([ torchvision.transforms.Resize(224), torchvision.transforms.CenterCrop(224), torchvision.transforms.ToTensor(), torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ])) trainloader = trainset.get_loader(batch_size=2, shuffle=True, drop_last=True, num_workers=0) testset = TestFashion200k( path=Path1, split='test', transform=torchvision.transforms.Compose([ torchvision.transforms.Resize(224), torchvision.transforms.CenterCrop(224), torchvision.transforms.ToTensor(), torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ])) trig = TIRG([t.encode().decode('utf-8') for t in trainset.get_all_texts()], 512) trig.load_state_dict( torch.load(Path1 + r'\checkpoint_fashion200k.pth', map_location=torch.device('cpu'))['model_state_dict']) opt = argparse.ArgumentParser() opt.add_argument('--batch_size', type=int, default=2) opt.add_argument('--dataset', type=str, default='fashion200k') opt.batch_size = 1 opt.dataset = 'fashion200k' Results = [] for i in range(15): for name, dataset in [('train', trainset)]: #,('test', testset)]: # betaNor="['1 ---> 5.27', '5 ---> 14.39', '10 ---> 21.6', '50 ---> 43.830000000000005', '100 ---> 55.33']" # Results.append('No.'+str(i)+' DataSet='+name+' Type= BetaNormalized '+' Result=' +betaNor) try: betaNor = test_retrieval.testbetanormalizednot( opt, trig, dataset, BetaNormalize) print(name, ' BetaNormalized: ', betaNor) Results.append('No.' + str(i) + ' DataSet=' + name + ' Type= BetaNormalized ' + ' Result=' + betaNor) except: print('ERROR') try: asbook = test_retrieval.test(opt, trig, dataset) print(name, ' As PaPer: ', asbook) Results.append('No.' + str(i) + ' DataSet=' + name + ' Type= As PaPer ' + ' Result=' + betaNor) except: print('ERROR') with open(Path1 + r"/" + 'Results15time.txt', 'wb') as fp: pickle.dump(Results, fp)
def getbetatrainNot(): train = datasets.Fashion200k( path=Path1, split='train', transform=torchvision.transforms.Compose([ torchvision.transforms.Resize(224), torchvision.transforms.CenterCrop(224), torchvision.transforms.ToTensor(), torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ])) trig = img_text_composition_models.TIRG( [t.encode().decode('utf-8') for t in train.get_all_texts()], 512) trig.load_state_dict( torch.load(Path1 + r'\fashion200k.tirg.iter160k.pth', map_location=torch.device('cpu'))['model_state_dict']) trig.eval() imgs = [] mods = [] trigdata = [] target = [] imgdata = [] for Data in tqdm(train): imgs += [Data['source_img_data']] mods += [Data['mod']['str']] target += [Data['target_img_data']] imgs = torch.stack(imgs).float() imgs = torch.autograd.Variable(imgs) f = trig.compose_img_text(imgs, mods).data.cpu().numpy() target = torch.stack(target).float() target = torch.autograd.Variable(target) f2 = trig.extract_img_feature(target).data.cpu().numpy() trigdata.append(f[0]) imgdata.append(f2[0]) imgs = [] mods = [] target = [] trigdata = np.array(trigdata) imgdata = np.array(imgdata) Ntrigdata = trigdata Nimgdata = imgdata Ntrig2 = [] for i in range(Ntrigdata.shape[0]): Ntrigdata[i, :] /= np.linalg.norm(Ntrigdata[i, :]) for i in range(Nimgdata.shape[0]): Nimgdata[i, :] /= np.linalg.norm(Nimgdata[i, :]) for i in range(Ntrigdata.shape[0]): Ntrig2.append(np.insert(Ntrigdata[i], 0, 1)) Ntrig2 = np.array(Ntrig2) Ntrigdata1 = Ntrig2.transpose() X1 = np.matmul(Ntrigdata1, Ntrig2) X2 = np.linalg.inv(X1) X3 = np.matmul(X2, Ntrigdata1) Nbeta = np.matmul(X3, Nimgdata) with open(Path1 + r"/" + 'BetaNot.txt', 'wb') as fp: pickle.dump(Nbeta, fp)