def __init__(self, model_name, validate=False): host, port = (config.get('logo_server.host'), config.get('logo_server.port')) super(LogoClient, self).__init__(host, port) if validate: error_msg = "{} not in list of available models".format(model_name) assert model_name in self.list_models(), error_msg self.model_name = model_name
class CeleryConfig(BaseConfig): REDIS = redis_hosts(config.get('celery.cache_cluster'), sleep_time=0, num_tries=0)[0] PORT = config.get('celery.cache_port') BROKER_URL = 'redis://guest@{}:{}'.format(REDIS, PORT) CELERY_RESULT_BACKEND = 'redis://guest@{}:{}'.format(REDIS, PORT) CELERY_ALWAYS_EAGER = False
def _query_server(cls, args_dict, timeout=180.0): mgr_hostname = config.get('lda_server.host') mgr_port = config.get('lda_server.port') clientsocket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) clientsocket.settimeout(timeout) clientsocket.connect((mgr_hostname, mgr_port)) try: clientsocket.sendall(json.dumps(args_dict) + '\n') return clientsocket.recv(1024) finally: clientsocket.close()
def __init__(self, model_id, validate_model_id=True): """ Establish a connection with the server. Args: model_id: id from ClassifierModel table. """ host, port = (config.get('url_server.host'), config.get('url_server.port')) super(UrlClient, self).__init__(host, port) self._model_id = model_id if validate_model_id: self._check_model_id(model_id)
def submit_hits(self, data): """ Submit mturk hits with the input data. Args: data: if self.evaluator_type is ClickableImageEvaluator, data going to create_hit (submit_image_hits) has to be list of lists [video-id,image-timestamp] if self.evaluator_type is VideoCollageEvaluator, data going to create_hit (submit_vc_hits) has to be list of video ids Returns: hit_type, number of hits submitted """ evaluator = self.evaluator_type.query.filter_by( target_label_id=self.label_id).one() num_hits_submitted = 0 hit_type = None if 'sandbox' in config.get("mturk_hostname"): evaluator.min_hits_approved = 0 session.flush() if type(evaluator) == ClickableImageEvaluator: assert (len(data) > 0 and len(data[0]) == 2), \ "If we use ClickableImageEvaluator hits, data needs to be a list of lists [video-id,image-timestamp]" hit_type, num_hits_submitted = self._submit_image_hits( evaluator, data) else: assert (len(data) > 0 and type(data[0]) is not list), \ "If we use VideoCollageEvaluator hits, data needs to be a list of video ids" hit_type, num_hits_submitted = self._submit_vc_hits( evaluator, data) return hit_type, num_hits_submitted
def __init__(self, label_id, evaluator_type=ClickableImageEvaluator, **kwargs): super(MTurkImageJob, self).__init__() assert (evaluator_type in EVALUATOR_TYPES_SUPPORTED), \ "evaluator_type has to be in %s" % str(EVALUATOR_TYPES_SUPPORTED) self.label_id = label_id self.finished = False self.evaluator_type = evaluator_type self.hit = evaluator_type.hit_type if evaluator_type.query.filter_by(target_label_id=label_id).count(): evaluator = evaluator_type.query.filter_by( target_label_id=label_id).one() else: evaluator_name = "MTurk Image Evaluator for %s" % Label.get( label_id).name evaluator = evaluator_type(name=evaluator_name, target_label_id=label_id, **kwargs) session.flush() # only for stage and dev if 'sandbox' in config.get("mturk_hostname"): evaluator.min_hits_approved = 0 session.flush()
def __init__(self): DEVELOPER_KEY = config.get('youtube.developer_key') YOUTUBE_API_SERVICE_NAME = "youtube" YOUTUBE_API_VERSION = "v3" self.yt_service = build(YOUTUBE_API_SERVICE_NAME, YOUTUBE_API_VERSION, developerKey=DEVELOPER_KEY)
def configure_app(app): test = config.get('celery.test') if test: from config.celerytestconfig import CeleryTestConfig app.config_from_object(CeleryTestConfig) else: from config.celeryconfig import CeleryConfig app.config_from_object(CeleryConfig)
def forward_updated_pages(pages_to_forward): logger.info('Forwarding %s pages' % len(pages_to_forward)) data = { 'page_ids': list(pages_to_forward), 'timestamp': int(time.time()), } queue_name = config.get('sqs.page_label_updates_queue') queue = sqs.get_queue(queue_name) sqs.write_to_queue(queue, data)
def set_values(self, **kwargs): bucket = config.get('affine.s3.bucket') try: if self.mock_evaluator: self.min_percent_approved = 0 self.max_assignments = 1 self.min_hits_approved = 0 self.match_threshold = 1 self.require_adult = False session.flush() config.set('affine.s3.bucket', 'affine') f = func(self, **kwargs) return f finally: config.set('affine.s3.bucket', bucket)
def load_model(cls, model_dir): """ Method returns instance of class WordRecProcessor Args: model_dir: dir string with model Returns: WordRecProcessor instance """ has_gpu = config.get('has_gpu') assert os.path.exists(model_dir) if os.path.isfile(os.path.join(model_dir, MEAN_IMAGE)): return cls(model_dir, MAT_FILE, DEPLOY, mean_file=MEAN_IMAGE, with_gpu=has_gpu) else: return cls(model_dir, MAT_FILE, DEPLOY, with_gpu=has_gpu)
def spotlight_annotate(text, confidence=0.5, support=20, timeout=30): """ Annotates the text using spotlight. Args: text: The text that should be annotated. confidence, support: Internal spotlight parameters. timeout: Wait time before the server is considered timed out. Returns: A list of dics containing the results of the classification. Raises: Exception: The server did not reposnded with proper JSON. """ # some unicode characters are problematic text = _preprocess_text(text) # '' returns a 400 error if text.strip() == '': return [] spotlight_address = config.get('spotlight_server.address') + "/annotate/" data = {'confidence': confidence, 'support': support, 'text': text} headers = {'accept': 'application/json'} response = requests.post(spotlight_address, data=data, headers=headers, timeout=timeout) if response.status_code != requests.codes.ok: response.raise_for_status() results = response.json() if results is None: raise Exception("There Response does not contain proper JSON") if 'Resources' not in results: return [] return [_clean_dic(result) for result in results['Resources']]
def load_model(cls, model_dir): """ Method returns instance of class CaffeProcessor for a valid model_id This method downloads the model's tarball Args: model_id: primary key from table cnn_models Returns: CaffeProcessor instance loaded from s3_cnn_bucket """ has_gpu = config.get('has_gpu') assert os.path.exists(model_dir) if os.path.isfile(os.path.join(model_dir, MEAN_IMAGE)): return cls(model_dir, CAFFE_MODEL, DEPLOY, mean_file=MEAN_IMAGE, with_gpu=has_gpu) else: return cls(model_dir, CAFFE_MODEL, DEPLOY, with_gpu=has_gpu)
def format_data(self, box_ids): """ideal number of box ids is 18, but the template will not break if there are more or less""" boxes = [Box.get(face_id) for face_id in box_ids] videos = defaultdict(dict) for box in boxes: video_id = int(box.video_id) box_id = int(box.id) videos[box_id]['video_id'] = video_id videos[box_id]['thumbnail'] = map( int, [box.timestamp, box.x, box.y, box.width, box.height]) data = { 'question': self.question, 'image_bucket': config.get('affine.s3.bucket'), } data['data'] = { "evaluator_id": str(self.id), "videos": dict(videos), "reference_image": self.reference_image_url if self.reference_image_url is not None else '', } return data
def submit_hits(cls, evaluator_id, image_folder_path, job_id): """ Submit On-demand HITs to MTurk. Function mocks input evaluator when not in prod Args: evaluator_id: evaluator to be used for the job image_folder_path: path to local directory with images to be QA'ed job_id: name to uniquely identify the job (e.g. logo-prod_2015-01-01) """ evaluator = MechanicalTurkEvaluator.get(evaluator_id) if 'sandbox' in config.get("mturk_hostname"): evaluator.mock_evaluator = True ev_type = evaluator.evaluator_type msg = "The selected evaluator is not currently supported" assert ev_type in EVALUATOR_FOLDER_TYPE, msg logger.info('Uploading to S3...') s3_urls = cls._upload_to_s3(image_folder_path, ev_type, job_id) logger.info('Creating HITs...') if ev_type == 'images': cls._submit_image_hits(evaluator, s3_urls, job_id) elif ev_type == 'page_text': cls._submit_screenshot_hits(evaluator, s3_urls, job_id) elif ev_type == 'videos': cls._submit_collage_hits(evaluator, s3_urls, job_id)
def get_s3_bucket(cls, is_on_demand): """ Returns the correct S3 bucket depending on on-demand or regular HIT """ return config.get('affine.s3.on_demand_qa_bucket' ) if is_on_demand else config.get('affine.s3.bucket')
def _queue_name(self): return self._QUEUE_NAME_FMT % dict(env=config.get('env'), id=self.id)
def mock_evaluator(self, value): message = "Cannot mock evaluator in prod" assert 'sandbox' in config.get('mturk_hostname'), message message = "Input value should be boolean" assert isinstance(value, bool), message self.__mock_evaluator = value
def __init__(self): table_name = config.get('dynamo.face_signature_table_name') super(DynamoFaceSignatureClient, self).__init__(table_name)