def load_given_tokens(tokens=None, randomize=True): """ @param tokens: None for loading all instances @type tokens: list(str) @return: (token, data_stream) """ def get_data_stream(token): """ @return: [(img, (4 coordinates))] """ def process_line(line): if line.find(",") >= 0: return map(int, line.split(",")) else: return map(int, line.split("\t")) img_dir = get_images_directory(token) for idx, line in enumerate(open(token)): img = cv2.imread(os.path.join(img_dir, "%04d.jpg" % (idx + 1))) x, y, w, h = process_line(line.strip()) coords = [(x, y), (x + w, y), (x + w, y + h), (x, y + h)] coords = [(float(x) / img.shape[1], float(y) / img.shape[0]) for x, y in coords] yield (img, coords) if tokens is None: tokens = get_tokens() if randomize: random.shuffle(tokens) for token in tokens: yield (token, get_data_stream(token))
def load_given_tokens(tokens=None, randomize=True): """ @param tokens: None for loading all instances @type tokens: list(str) @return: (token, data_stream) """ def get_data_stream(token): """ @return: [(img, (4 coordinates))] """ img_dir = get_images_directory(token) for idx, line in enumerate(open(token)): img = cv2.imread(os.path.join(img_dir, "%08d.jpg" % (idx + 1))) arr = map(int, map(float, line.strip().split(","))) coords = zip(arr[::2], arr[1::2]) coords = coords[1:] + coords[:1] coords = [(float(x) / img.shape[1], float(y) / img.shape[0]) for x, y in coords] yield (img, coords) if tokens is None: tokens = get_tokens() if randomize: random.shuffle(tokens) for token in tokens: yield (token, get_data_stream(token))
def load_given_tokens(tokens=None, randomize=True): """ @param tokens: None for loading all instances @type tokens: list(str) @return: (token, data_stream) """ def get_data_stream(token): """ @return: [(img, (4 coordinates))] """ img_dir = get_images_directory(token) for f_idx, coords in load_annotations(token): img = cv2.imread(os.path.join(img_dir, "%08d.jpg" % f_idx)) coords = [(float(x) / img.shape[1], float(y) / img.shape[0]) for x, y in coords] yield (img, coords) if tokens is None: tokens = get_tokens() if randomize: random.shuffle(tokens) for token in tokens: yield (token, get_data_stream(token))
if not word == " ": st.append(offset) cutw.append((word, POS.index(flag))) offset += len(word) cut_result[f_no] = [st, cutw] return cut_result #导入数据 data = ace_data.load() docs = data["docs"] nes = data["nes"] res = data["res"] #导入词袋 tokens = token.get_tokens() #文本分词 seg_docs = get_seg(docs) #获取各种的类型对应表list el = sorted(list(set([x[1] for f in nes.values() for x in f.values()]))) esl = sorted(list(set([x[-1] for f in nes.values() for x in f.values()]))) rl = sorted(list(set([x[1] for f in res.values() for x in f.values()]))) rsl = sorted(list(set([x[2] for f in res.values() for x in f.values()]))) #提取特征过程 w = 2 features = {} lables = {} for f_no in res: