def process_actor(name, gender, url, data_path='./data/pr0n_processor/backend/'): uid = unique_id(url) os.makedirs(os.path.join(data_path, uid[0], uid[1]), exist_ok=True) filepath = os.path.join(data_path, uid[0], uid[1], uid) if os.path.exists(filepath + '.pkl'): return try: page_data = yield httpclient.fetch(url, request_timeout=10) except HTTPError: return soup = BeautifulSoup(page_data.body, 'html.parser') img = soup.find('img', title=re.compile("Portrait of")) img_src = img.attrs['src'] try: image_req = yield httpclient.fetch(img_src, request_timeout=10) except HTTPError: return image_fd = BytesIO(image_req.body) try: image = Image.open(image_fd) except OSError: return image_np = np.array(image) try: rects, scores, poses = detector.run(image_np) except RuntimeError: return if len(scores) != 1: return try: face_hash = yield openface.hash_face(image_np, bb=rects[0]) except: return data = { 'url': url, 'uid': uid, 'rects': rects[0], 'pose': poses[0], 'score': scores[0], 'face_hash': face_hash, 'name': name, 'gender': gender, } try: image.save(filepath + '.jpg') with open(filepath + '.pkl', 'wb+') as fd: pickle.dump(data, fd, protocol=-1) print(name, gender, url, uid) except OSError: return
def get(self): showid = self.get_argument('showtime_id') shares = self.get_arguments('share') passphrase = self.get_argument('passphrase', None) ticket_api = CONFIG.get('ticket_api') # we could also just pass the raw arguments, but this is more explicit # and 'self documenting' params = [('showtime_id', showid)] if passphrase: params += [('passphrase', passphrase)] elif shares: params += [('share', s) for s in shares] url = url_concat(ticket_api + '/api/showtimes/keys', params) show_data_raw = yield httpclient.fetch(url) if show_data_raw.code != 200: return self.error(show_data_raw.code, show_data_raw.body) show_data = json.loads(show_data_raw.body) show_date = show_data['data']['date'] rfidb = yield RFIDB.get_global() result = yield rfidb.unlock_show(showid, show_date, show_data['data']['users']) return self.api_response(result)
def get(self): showid = self.get_argument('showtime_id') shares = self.get_arguments('share') passphrase = self.get_argument('passphrase', None) reprocess = bool(self.get_argument('reprocess', None) is not None) ticket_api = CONFIG.get('ticket_api') # we could also just pass the raw arguments, but this is more explicit # and 'self documenting' params = [('showtime_id', showid)] if passphrase: params += [('passphrase', passphrase)] elif shares: params += [('share', s) for s in shares] url = url_concat(ticket_api + '/api/showtimes/access_tokens', params) show_data_raw = yield httpclient.fetch(url, request_timeout=180) if show_data_raw.code != 200: return self.error(show_data_raw.code, show_data_raw.body) exhibitperms = yield ExhibitPermissions.get_global() show_data = json.loads(show_data_raw.body) show_date = show_data['data']['date'] users_added = [] for user_data in show_data['data']['users']: userid = user_data.pop('id') perms = yield exhibitperms.get_permissions(userid) if perms and not reprocess: users_added.append({'userid': userid, 'permissions': perms, 'process': False}) publickey = user_data['publickey'] privatekey = user_data.get('privatekey') meta = user_data.get('meta') or {} meta.update({'showid': showid, 'permissions': perms, 'showdate': show_date}) user = User(userid, publickey, services=user_data['services'], privatekey_pem=privatekey, meta=meta) users_added.append({'userid': userid, 'process': True}) ioloop.IOLoop.current().add_callback(partial(userprocess, user)) return self.api_response(users_added)
def process_porndb(): base = 'http://www.adultfilmdatabase.com/browse.cfm?' \ 'type=actor&page={}&imageFlag=1' for page in IT.count(1): url = base.format(page) print(url) data = yield httpclient.fetch(url) soup = BeautifulSoup(data.body, 'html.parser') actors = [] for row in soup.findAll("tr"): cells = row.findAll("td") if len(cells) != 7: continue try: name = cells[0].getText().strip() gender = cells[1].getText().strip() page = urljoin(url, cells[0].find('a').attrs['href']) actors.append((name, gender, page)) except (IndexError, AttributeError): continue yield list(IT.starmap(process_actor, actors)) yield gen.sleep(5)