def __init__(self, root, filename): self.root = root self.path = os.path.join(self.root, filename) with open(self.path, "r") as F: self.lines = F.readlines() self.rows = cfg.input_hight self.cols = cfg.input_width
def read_data(files): with open(files, 'r', encoding="utf-8") as f: lines = f.readlines() label_text = defaultdict(list) for line in lines: l = line.strip().split("\t") if len(l) == 2: label_text[l[0]].append(l[1]) else: print(l) val_data = {} train_data = {} labels = set() for label, text in label_text.items(): # random.shuffle(text) number = len(text) # train_number = min(MIN_TRAIN_NUMBER,int(number*0.5)) train_number = MIN_TRAIN_NUMBER train_data[label] = text[:train_number] val_data[label] = text[train_number:] labels.add(label) labels = sorted(labels) labels = dict(zip(labels, range(len(labels)))) return val_data, train_data, labels
def __init__(self,list_path): f = open(list_path,'r') self.uttid_dic = {} #initialization for ln in f.readlines(): temp_str = ln.split(' ',1) self.uttid_dic[temp_str[0]] = int(temp_str[1]) f.close()
def _get_data(self): with gzip.open(self.path) as f: data = [] dialog = [] for line in f.readlines(): line = line.rstrip() #if type(line)!=type('a'): # line=bytes.decode(line) if line.startswith('['): data.append(line) line = line.split() dialog.append( [line[0], line[1], line[2], " ".join(line[3:])]) return dialog
def __init__(self): self.data = self.read_file(args.in_dir + args.data) self.labels = self.read_file(args.in_dir + args.labels) fixed_seed = rd.random() rd.Random(fixed_seed).shuffle(self.data) rd.Random(fixed_seed).shuffle(self.labels) data_batches = self.grouper(self.data, args.batch_size) labels_batches = self.grouper(self.labels, args.batch_size) if len(self.data) % args.batch_size != 0: data_batches[-1] = [ paragraph for paragraph in data_batches[-1] if paragraph is not None ] labels_batches[-1] = [ label for label in labels_batches[-1] if label is not None ] self.data_batches = data_batches self.labels_batches = labels_batches self.vocabulary = self.read_file(args.v_dir + args.vocab) self.char_to_idx = {c: i for i, c in enumerate(self.vocabulary)} self.idx_to_char = {i: c for i, c in enumerate(self.vocabulary)} self.num_batches = len(self.data_batches) self.next_batch = 0 languages = list(set(self.labels)) languages.sort() self.languages = languages with open('data/wili-2018/y_test_clean.txt', 'r') as f: real_languages = f.readlines() self.real_languages = [language[:-1] for language in real_languages] self.real_languages = sorted(list(set(self.real_languages))) self.lang_to_idx = {l: i for i, l in enumerate(self.real_languages)} self.idx_to_lang = {i: l for i, l in enumerate(self.real_languages)} self.char_to_idx = defaultdict(lambda: self.char_to_idx['0'], self.char_to_idx) self.languages = self.real_languages print(len(self.languages))
def load_data(path): """ 读取数据和标签 :param path:数据集文件夹路径 :return:返回读取的片段和对应的标签 """ sentences = [] # 片段 target = [] # 作者 # 定义lebel到数字的映射关系 labels = {'LX': 0, 'MY': 1, 'QZS': 2, 'WXB': 3, 'ZAL': 4} files = os.listdir(path) for file in files: if not os.path.isdir(file): f = open(path + "/" + file, 'r', encoding='UTF-8') # 打开文件 for index, line in enumerate(f.readlines()): sentences.append(line) target.append(labels[file[:-4]]) # 去掉.txt,如LX.txt return list(zip(sentences, target))
def read_data_path(file_name): img_list = [] label_list = [] with open(file_name) as f: for line in f.readlines(): line = line.strip('\n').split(' ') img = line[0] label = int(line[1]) img_list.append(img) label_list.append(label) #end_for #end_with print('the number of sample: ', len(img_list)) #print(len(label_list)); #print(img_list[0], label_list[0]); print('Done.') return img_list, label_list
def openTxt(self,MSEfile): mse_list = [] with open(MSEfile) as f: mse_list = f.readlines() mse_list = [float(x.strip()) for x in mse_list] return mse_list
def main(dataset_name=None): cuda = True if torch.cuda.is_available() else False IMAGE_SIZE = np.array([256, 256]) opt.dataset_name = dataset_name files = opt.dataroot + opt.dataset_name + '_' + opt.phase + '.txt' comp_paths = [] harmonized_paths = [] mask_paths = [] real_paths = [] with open(files, 'r') as f: for line in f.readlines(): name_str = line.rstrip() if opt.evaluation_type == 'our': harmonized_path = os.path.join( opt.result_root, name_str.replace(".jpg", "_harmonized.jpg")) if os.path.exists(harmonized_path): real_path = os.path.join( opt.result_root, name_str.replace(".jpg", "_real.jpg")) mask_path = os.path.join( opt.result_root, name_str.replace(".jpg", "_mask.jpg")) comp_path = os.path.join( opt.result_root, name_str.replace(".jpg", "_comp.jpg")) elif opt.evaluation_type == 'ori': comp_path = os.path.join(opt.dataroot, 'composite_images', line.rstrip()) harmonized_path = comp_path if os.path.exists(comp_path): real_path = os.path.join(opt.dataroot, 'real_images', line.rstrip()) name_parts = real_path.split('_') real_path = real_path.replace( ('_' + name_parts[-2] + '_' + name_parts[-1]), '.jpg') mask_path = os.path.join(opt.dataroot, 'masks', line.rstrip()) mask_path = mask_path.replace(('_' + name_parts[-1]), '.png') real_paths.append(real_path) mask_paths.append(mask_path) comp_paths.append(comp_path) harmonized_paths.append(harmonized_path) count = 0 mse_scores = 0 sk_mse_scores = 0 fmse_scores = 0 psnr_scores = 0 fpsnr_scores = 0 ssim_scores = 0 fssim_scores = 0 fore_area_count = 0 fmse_score_list = [] image_size = 256 for i, harmonized_path in enumerate(tqdm(harmonized_paths)): count += 1 harmonized = Image.open(harmonized_path).convert('RGB') real = Image.open(real_paths[i]).convert('RGB') mask = Image.open(mask_paths[i]).convert('1') if mask.size[0] != image_size: harmonized = tf.resize(harmonized, [image_size, image_size], interpolation=Image.BICUBIC) mask = tf.resize(mask, [image_size, image_size], interpolation=Image.BICUBIC) real = tf.resize(real, [image_size, image_size], interpolation=Image.BICUBIC) harmonized_np = np.array(harmonized, dtype=np.float32) real_np = np.array(real, dtype=np.float32) harmonized = tf.to_tensor(harmonized_np).unsqueeze(0).cuda() real = tf.to_tensor(real_np).unsqueeze(0).cuda() mask = tf.to_tensor(mask).unsqueeze(0).cuda() mse_score = mse(harmonized_np, real_np) psnr_score = psnr(real_np, harmonized_np, data_range=255) fore_area = torch.sum(mask) fmse_score = torch.nn.functional.mse_loss( harmonized * mask, real * mask) * 256 * 256 / fore_area mse_score = mse_score.item() fmse_score = fmse_score.item() fore_area_count += fore_area.item() fpsnr_score = 10 * np.log10((255**2) / fmse_score) ssim_score, fssim_score = pytorch_ssim.ssim( harmonized, real, window_size=opt.ssim_window_size, mask=mask) psnr_scores += psnr_score mse_scores += mse_score fmse_scores += fmse_score fpsnr_scores += fpsnr_score ssim_scores += ssim_score fssim_scores += fssim_score image_name = harmonized_path.split("/") image_fmse_info = (image_name[-1], round(fmse_score, 2), fore_area.item(), round(mse_score, 2), round(psnr_score, 2), round(fpsnr_scores, 4)) fmse_score_list.append(image_fmse_info) mse_scores_mu = mse_scores / count psnr_scores_mu = psnr_scores / count fmse_scores_mu = fmse_scores / count fpsnr_scores_mu = fpsnr_scores / count ssim_scores_mu = ssim_scores / count fssim_score_mu = fssim_scores / count print(count) mean_sore = "%s MSE %0.2f | PSNR %0.2f | SSIM %0.4f |fMSE %0.2f | fPSNR %0.2f | fSSIM %0.4f" % ( opt.dataset_name, mse_scores_mu, psnr_scores_mu, ssim_scores_mu, fmse_scores_mu, fpsnr_scores_mu, fssim_score_mu) print(mean_sore) return mse_scores_mu, fmse_scores_mu, psnr_scores_mu, fpsnr_scores_mu