Exemple #1
0
def test_image(args, model):
    if args.detector == 'dlib':
        import dlib
    elif args.detector == 'faceboxes':
        from utils.face_detector import FaceDetectorFaceboxes

    model.eval()

    device = torch.device("cuda" if args.gpu else "cpu")

    image = Image.open(args.image).convert('RGB')

    if args.resize > 0:
        image = resize(image, args.resize)

    detector = None
    if args.detector == 'dlib':
        detector = dlib.get_frontal_face_detector()
    elif args.detector == 'faceboxes':
        MODEL_PATH = 'model/faceboxes.pb'
        detector = FaceDetectorFaceboxes(MODEL_PATH, gpu_memory_fraction=0.25, visible_device_list='0')

    segmenter = Segmenter(model, device, detector, mode=args.detector)

    result = segmenter.segment(PIL2opencv(image), args.remove_small_area)
    result = opencv2PIL(result)

    if args.save:
        result.save(args.save)

    if not args.unshow:
        result.show()
        image.show()
Exemple #2
0
def test_video(args, model):
    if args.video == '0':
        cap = cv2.VideoCapture(0)
    else:
        cap = cv2.VideoCapture(args.video)

    w_win = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    h_win = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

    print(w_win, h_win)

    if args.resize > 0:
        short_size = args.resize
        if w_win > h_win:
            nw, nh = short_size, int(w_win * short_size / h_win)
        else:
            nw, nh = int(h_win * short_size / w_win), short_size
    else:
        nw, nh = w_win, h_win

    detector = None
    if args.detector == 'dlib':
        detector = dlib.get_frontal_face_detector()
    elif args.detector == 'faceboxes':
        MODEL_PATH = 'model/faceboxes.pb'
        detector = FaceDetectorFaceboxes(MODEL_PATH, gpu_memory_fraction=0.25, visible_device_list='0')

    device = torch.device("cuda" if args.gpu else "cpu")
    segmenter = Segmenter(model, device, detector, mode=args.detector)

    if args.save:
        fourcc = cv2.VideoWriter_fourcc(*'MJPG')
        out = cv2.VideoWriter(args.save, fourcc, 20, (nh, nw), True)

    while True:
        frame = cap.read()[1]

        if frame is None:
            break

        frame = cv2.resize(frame, (nh, nw))

        result = segmenter.segment(frame, args.remove_small_area)

        if args.save:
            out.write(result)


        if not args.unshow:
            cv2.imshow('image', result)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()

    if args.save:
        out.release()
                if mode == 1:
                    yield (X_batch, y_batch)
                else:
                    yield X_batch

                # очищаем матрицы порции для новой порции
                X_batch.fill(0)
                y_batch.fill(0)
                batch_index = 0


w2v = gensim.models.KeyedVectors.load_word2vec_format(
    word2vector_path, binary=not word2vector_path.endswith('.txt'))
w2v_dims = len(w2v.syn0[0])

segmenter = Segmenter()
tokenizer = Tokenizer()

print('Collecting samples...')
samples = []
all_words = set([PAD_WORD])
max_phrase_len = 0

if True:
    # добавляем пары предпосылка-вопрос из обучающего датасета

    with codecs.open(os.path.join(data_folder, qa_path), "r", "utf-8") as inf:

        loading_state = 'T'

        text = []
Exemple #4
0
        sys.stdout.encoding).strip().lower()

    phrases1 = []
    segm_mode = raw_input(
        'Use EOL markers (1) or segmenter (2) to split file to sentences?'
    ).strip()

    max_nb_facts = int(
        raw_input(
            'maximum number of samples to read from file (-1 means all):\n> ').
        strip())
    if max_nb_facts == -1:
        max_nb_facts = 10000000

    if segm_mode == 2:
        segmenter = Segmenter()
        phrases0 = segmenter.split(
            codecs.open(path1, 'r', 'utf-8').readlines())
        for phrase in enumerate(phrases):
            words = tokenizer.tokenize(phrase)
            if len(words) > 0:
                phrases1.append(words)
            if len(phrases1) >= max_nb_facts:
                break
    else:
        with codecs.open(path1, 'r', 'utf-8') as rdr:
            for phrase in rdr:
                words = tokenizer.tokenize(phrase)
                if len(words) > 0:
                    phrases1.append(words)
                if len(phrases1) >= max_nb_facts:
Exemple #5
0
# Created on: 00:59:12
import os
import sys
import numpy as np
import gensim
from copy import deepcopy
sys.path.append(
    os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir))
from config import Config
from utils import log
from utils.segmenter import Segmenter
from utils.tfidf import word_idf, sif_embedding, get_weighted_average
from utils.tools import load_embedding, cosine

config = Config()
cut = Segmenter()
logger = log.getLogger(__name__)


class Embedding(object):
    def __init__(self, embedding_path, documents, data_seg):
        # default pre-trained word embedding
        self._word_embedding = gensim.models.KeyedVectors.load_word2vec_format(
            embedding_path, binary=True)
        # Topic clusters documents
        self.documents = documents
        self.data_seg = data_seg
        self.weights_of_words = word_idf(self.documents)
        # initized some components for sentence embedding
        self.word2idx, self.wv_mat = None, None
        self._word2vec()