Esempio n. 1
0
def load_given_tokens(tokens=None, randomize=True):
    """
    @param tokens: None for loading all instances
    @type tokens: list(str)
    @return: (token, data_stream)
    """
    def get_data_stream(token):
        """
        @return: [(img, (4 coordinates))]
        """
        def process_line(line):
            if line.find(",") >= 0:
                return map(int, line.split(","))
            else:
                return map(int, line.split("\t"))

        img_dir = get_images_directory(token)
        for idx, line in enumerate(open(token)):
            img = cv2.imread(os.path.join(img_dir, "%04d.jpg" % (idx + 1)))
            x, y, w, h = process_line(line.strip())
            coords = [(x, y), (x + w, y), (x + w, y + h), (x, y + h)]
            coords = [(float(x) / img.shape[1], float(y) / img.shape[0])
                      for x, y in coords]
            yield (img, coords)

    if tokens is None:
        tokens = get_tokens()
    if randomize:
        random.shuffle(tokens)
    for token in tokens:
        yield (token, get_data_stream(token))
Esempio n. 2
0
def load_given_tokens(tokens=None, randomize=True):
    """
    @param tokens: None for loading all instances
    @type tokens: list(str)
    @return: (token, data_stream)
    """

    def get_data_stream(token):
        """
        @return: [(img, (4 coordinates))]
        """
        img_dir = get_images_directory(token)
        for idx, line in enumerate(open(token)):
            img = cv2.imread(os.path.join(img_dir, "%08d.jpg" % (idx + 1)))
            arr = map(int, map(float, line.strip().split(",")))
            coords = zip(arr[::2], arr[1::2])
            coords = coords[1:] + coords[:1]
            coords = [(float(x) / img.shape[1], float(y) / img.shape[0]) for x, y in coords]
            yield (img, coords)

    if tokens is None:
        tokens = get_tokens()
    if randomize:
        random.shuffle(tokens)
    for token in tokens:
        yield (token, get_data_stream(token))
Esempio n. 3
0
def load_given_tokens(tokens=None, randomize=True):
    """
    @param tokens: None for loading all instances
    @type tokens: list(str)
    @return: (token, data_stream)
    """
    def get_data_stream(token):
        """
        @return: [(img, (4 coordinates))]
        """
        img_dir = get_images_directory(token)
        for f_idx, coords in load_annotations(token):
            img = cv2.imread(os.path.join(img_dir, "%08d.jpg" % f_idx))
            coords = [(float(x) / img.shape[1], float(y) / img.shape[0])
                      for x, y in coords]
            yield (img, coords)

    if tokens is None:
        tokens = get_tokens()
    if randomize:
        random.shuffle(tokens)
    for token in tokens:
        yield (token, get_data_stream(token))
Esempio n. 4
0
            if not word == " ":
                st.append(offset)
                cutw.append((word, POS.index(flag)))
            offset += len(word)
        cut_result[f_no] = [st, cutw]
    return cut_result


#导入数据
data = ace_data.load()
docs = data["docs"]
nes = data["nes"]
res = data["res"]

#导入词袋
tokens = token.get_tokens()

#文本分词
seg_docs = get_seg(docs)

#获取各种的类型对应表list
el = sorted(list(set([x[1] for f in nes.values() for x in f.values()])))
esl = sorted(list(set([x[-1] for f in nes.values() for x in f.values()])))
rl = sorted(list(set([x[1] for f in res.values() for x in f.values()])))
rsl = sorted(list(set([x[2] for f in res.values() for x in f.values()])))

#提取特征过程
w = 2
features = {}
lables = {}
for f_no in res: