def process_actor(name,
                  gender,
                  url,
                  data_path='./data/pr0n_processor/backend/'):
    uid = unique_id(url)
    os.makedirs(os.path.join(data_path, uid[0], uid[1]), exist_ok=True)
    filepath = os.path.join(data_path, uid[0], uid[1], uid)
    if os.path.exists(filepath + '.pkl'):
        return

    try:
        page_data = yield httpclient.fetch(url, request_timeout=10)
    except HTTPError:
        return
    soup = BeautifulSoup(page_data.body, 'html.parser')
    img = soup.find('img', title=re.compile("Portrait of"))
    img_src = img.attrs['src']

    try:
        image_req = yield httpclient.fetch(img_src, request_timeout=10)
    except HTTPError:
        return
    image_fd = BytesIO(image_req.body)
    try:
        image = Image.open(image_fd)
    except OSError:
        return
    image_np = np.array(image)
    try:
        rects, scores, poses = detector.run(image_np)
    except RuntimeError:
        return
    if len(scores) != 1:
        return
    try:
        face_hash = yield openface.hash_face(image_np, bb=rects[0])
    except:
        return
    data = {
        'url': url,
        'uid': uid,
        'rects': rects[0],
        'pose': poses[0],
        'score': scores[0],
        'face_hash': face_hash,
        'name': name,
        'gender': gender,
    }
    try:
        image.save(filepath + '.jpg')
        with open(filepath + '.pkl', 'wb+') as fd:
            pickle.dump(data, fd, protocol=-1)
        print(name, gender, url, uid)
    except OSError:
        return
def process_subreddit(subreddit, data_path='./data/pr0n_processor/backend/'):
    reddit = praw.Reddit(user_agent='gulperpr0n')
    submissions = skip_unfound(chain(
        reddit.get_subreddit(subreddit).get_hot(),
        reddit.get_subreddit(subreddit).get_top_from_all(),
        reddit.get_subreddit(subreddit).get_top_from_year(),
        reddit.get_subreddit(subreddit).get_top_from_month(),
    ))
    for submission in submissions:
        try:
            url = normalize_url(submission.url)
            uid = unique_id(url)
        except ValueError:
            continue
        os.makedirs(os.path.join(data_path, uid[0], uid[1]), exist_ok=True)
        filepath = os.path.join(data_path, uid[0], uid[1], uid)
        if os.path.exists(filepath + '.pkl'):
            continue
        try:
            image_req = yield http_client.fetch(url, request_timeout=5)
        except HTTPError:
            continue
        image_fd = BytesIO(image_req.body)
        try:
            image = Image.open(image_fd)
        except OSError:
            continue
        image_np = np.array(image)
        try:
            rects, scores, poses = detector.run(image_np)
        except RuntimeError:
            continue
        if len(scores) != 1:
            continue
        try:
            face_hash = yield openface.hash_face(image_np, bb=rects[0])
        except:
            continue
        print(subreddit, uid, url)
        data = {
            'url': url,
            'uid': uid,
            'rects': rects[0],
            'pose': poses[0],
            'score': scores[0],
            'face_hash': face_hash,
            'reddit_submission': submission,
        }
        try:
            image.save(filepath + '.jpg')
            with open(filepath + '.pkl', 'wb+') as fd:
                pickle.dump(data, fd, protocol=-1)
        except OSError:
            continue
def process_actor(name, gender, url,
                  data_path='./data/pr0n_processor/backend/'):
    uid = unique_id(url)
    os.makedirs(os.path.join(data_path, uid[0], uid[1]), exist_ok=True)
    filepath = os.path.join(data_path, uid[0], uid[1], uid)
    if os.path.exists(filepath + '.pkl'):
        return

    try:
        page_data = yield httpclient.fetch(url, request_timeout=10)
    except HTTPError:
        return
    soup = BeautifulSoup(page_data.body, 'html.parser')
    img = soup.find('img', title=re.compile("Portrait of"))
    img_src = img.attrs['src']

    try:
        image_req = yield httpclient.fetch(img_src, request_timeout=10)
    except HTTPError:
        return
    image_fd = BytesIO(image_req.body)
    try:
        image = Image.open(image_fd)
    except OSError:
        return
    image_np = np.array(image)
    try:
        rects, scores, poses = detector.run(image_np)
    except RuntimeError:
        return
    if len(scores) != 1:
        return
    try:
        face_hash = yield openface.hash_face(image_np, bb=rects[0])
    except:
        return
    data = {
        'url': url,
        'uid': uid,
        'rects': rects[0],
        'pose': poses[0],
        'score': scores[0],
        'face_hash': face_hash,
        'name': name,
        'gender': gender,
    }
    try:
        image.save(filepath + '.jpg')
        with open(filepath + '.pkl', 'wb+') as fd:
            pickle.dump(data, fd, protocol=-1)
        print(name, gender, url, uid)
    except OSError:
        return