Esempio n. 1
0
 def __init__(self, model_name, validate=False):
     host, port = (config.get('logo_server.host'),
                   config.get('logo_server.port'))
     super(LogoClient, self).__init__(host, port)
     if validate:
         error_msg = "{} not in list of available models".format(model_name)
         assert model_name in self.list_models(), error_msg
     self.model_name = model_name
class CeleryConfig(BaseConfig):
    REDIS = redis_hosts(config.get('celery.cache_cluster'),
                        sleep_time=0,
                        num_tries=0)[0]
    PORT = config.get('celery.cache_port')
    BROKER_URL = 'redis://guest@{}:{}'.format(REDIS, PORT)
    CELERY_RESULT_BACKEND = 'redis://guest@{}:{}'.format(REDIS, PORT)
    CELERY_ALWAYS_EAGER = False
Esempio n. 3
0
 def _query_server(cls, args_dict, timeout=180.0):
     mgr_hostname = config.get('lda_server.host')
     mgr_port = config.get('lda_server.port')
     clientsocket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
     clientsocket.settimeout(timeout)
     clientsocket.connect((mgr_hostname, mgr_port))
     try:
         clientsocket.sendall(json.dumps(args_dict) + '\n')
         return clientsocket.recv(1024)
     finally:
         clientsocket.close()
Esempio n. 4
0
    def __init__(self, model_id, validate_model_id=True):
        """
        Establish a connection with the server.

        Args:
            model_id: id from ClassifierModel table.
        """
        host, port = (config.get('url_server.host'),
                      config.get('url_server.port'))
        super(UrlClient, self).__init__(host, port)
        self._model_id = model_id
        if validate_model_id:
            self._check_model_id(model_id)
Esempio n. 5
0
    def submit_hits(self, data):
        """
        Submit mturk hits with the input data.

        Args:
            data:
              if self.evaluator_type is ClickableImageEvaluator, 
                data going to create_hit (submit_image_hits) has to be 
                list of lists [video-id,image-timestamp]
              if self.evaluator_type is VideoCollageEvaluator, 
                data going to create_hit (submit_vc_hits) has to be list of video ids
        Returns:
            hit_type, number of hits submitted
        """
        evaluator = self.evaluator_type.query.filter_by(
            target_label_id=self.label_id).one()

        num_hits_submitted = 0
        hit_type = None
        if 'sandbox' in config.get("mturk_hostname"):
            evaluator.min_hits_approved = 0
            session.flush()
            if type(evaluator) == ClickableImageEvaluator:
                assert (len(data) > 0 and len(data[0]) == 2), \
                    "If we use ClickableImageEvaluator hits, data needs to be a list of lists [video-id,image-timestamp]"
                hit_type, num_hits_submitted = self._submit_image_hits(
                    evaluator, data)
            else:
                assert (len(data) > 0 and type(data[0]) is not list), \
                    "If we use VideoCollageEvaluator hits, data needs to be a list of video ids"
                hit_type, num_hits_submitted = self._submit_vc_hits(
                    evaluator, data)
        return hit_type, num_hits_submitted
Esempio n. 6
0
    def __init__(self,
                 label_id,
                 evaluator_type=ClickableImageEvaluator,
                 **kwargs):
        super(MTurkImageJob, self).__init__()

        assert (evaluator_type in EVALUATOR_TYPES_SUPPORTED), \
            "evaluator_type has to be in %s" % str(EVALUATOR_TYPES_SUPPORTED)

        self.label_id = label_id
        self.finished = False
        self.evaluator_type = evaluator_type
        self.hit = evaluator_type.hit_type

        if evaluator_type.query.filter_by(target_label_id=label_id).count():
            evaluator = evaluator_type.query.filter_by(
                target_label_id=label_id).one()
        else:
            evaluator_name = "MTurk Image Evaluator for %s" % Label.get(
                label_id).name
            evaluator = evaluator_type(name=evaluator_name,
                                       target_label_id=label_id,
                                       **kwargs)
            session.flush()

        # only for stage and dev
        if 'sandbox' in config.get("mturk_hostname"):
            evaluator.min_hits_approved = 0
            session.flush()
 def __init__(self):
     DEVELOPER_KEY = config.get('youtube.developer_key')
     YOUTUBE_API_SERVICE_NAME = "youtube"
     YOUTUBE_API_VERSION = "v3"
     self.yt_service = build(YOUTUBE_API_SERVICE_NAME,
                             YOUTUBE_API_VERSION,
                             developerKey=DEVELOPER_KEY)
Esempio n. 8
0
def configure_app(app):
    test = config.get('celery.test')
    if test:
        from config.celerytestconfig import CeleryTestConfig
        app.config_from_object(CeleryTestConfig)
    else:
        from config.celeryconfig import CeleryConfig
        app.config_from_object(CeleryConfig)
def forward_updated_pages(pages_to_forward):
    logger.info('Forwarding %s pages' % len(pages_to_forward))
    data = {
        'page_ids': list(pages_to_forward),
        'timestamp': int(time.time()),
    }
    queue_name = config.get('sqs.page_label_updates_queue')
    queue = sqs.get_queue(queue_name)
    sqs.write_to_queue(queue, data)
Esempio n. 10
0
 def set_values(self, **kwargs):
     bucket = config.get('affine.s3.bucket')
     try:
         if self.mock_evaluator:
             self.min_percent_approved = 0
             self.max_assignments = 1
             self.min_hits_approved = 0
             self.match_threshold = 1
             self.require_adult = False
             session.flush()
             config.set('affine.s3.bucket', 'affine')
         f = func(self, **kwargs)
         return f
     finally:
         config.set('affine.s3.bucket', bucket)
Esempio n. 11
0
    def load_model(cls, model_dir):
        """
        Method returns instance of class WordRecProcessor

        Args:
            model_dir: dir string with model

        Returns:
            WordRecProcessor instance
        """
        has_gpu = config.get('has_gpu')
        assert os.path.exists(model_dir)
        if os.path.isfile(os.path.join(model_dir, MEAN_IMAGE)):
            return cls(model_dir, MAT_FILE, DEPLOY, mean_file=MEAN_IMAGE,
                       with_gpu=has_gpu)
        else:
            return cls(model_dir, MAT_FILE, DEPLOY, with_gpu=has_gpu)
Esempio n. 12
0
def spotlight_annotate(text, confidence=0.5, support=20, timeout=30):
    """
    Annotates the text using spotlight.

    Args:
        text: The text that should be annotated.
        confidence, support: Internal spotlight parameters.
        timeout: Wait time before the server is considered timed out.

    Returns:
        A list of dics containing the results of the classification.

    Raises:
         Exception: The server did not reposnded with proper JSON.
    """

    # some unicode characters are problematic
    text = _preprocess_text(text)

    #  '' returns a 400 error
    if text.strip() == '':
        return []
    spotlight_address = config.get('spotlight_server.address') + "/annotate/"

    data = {'confidence': confidence, 'support': support, 'text': text}
    headers = {'accept': 'application/json'}
    response = requests.post(spotlight_address,
                             data=data,
                             headers=headers,
                             timeout=timeout)

    if response.status_code != requests.codes.ok:
        response.raise_for_status()

    results = response.json()

    if results is None:
        raise Exception("There Response does not contain proper JSON")

    if 'Resources' not in results:
        return []

    return [_clean_dic(result) for result in results['Resources']]
    def load_model(cls, model_dir):
        """
        Method returns instance of class CaffeProcessor for a valid model_id

        This method downloads the model's tarball

        Args:
            model_id: primary key from table cnn_models

        Returns:
            CaffeProcessor instance loaded from s3_cnn_bucket
        """
        has_gpu = config.get('has_gpu')
        assert os.path.exists(model_dir)
        if os.path.isfile(os.path.join(model_dir, MEAN_IMAGE)):
            return cls(model_dir,
                       CAFFE_MODEL,
                       DEPLOY,
                       mean_file=MEAN_IMAGE,
                       with_gpu=has_gpu)
        else:
            return cls(model_dir, CAFFE_MODEL, DEPLOY, with_gpu=has_gpu)
Esempio n. 14
0
 def format_data(self, box_ids):
     """ideal number of box ids is 18, but the template will not break if there are more or less"""
     boxes = [Box.get(face_id) for face_id in box_ids]
     videos = defaultdict(dict)
     for box in boxes:
         video_id = int(box.video_id)
         box_id = int(box.id)
         videos[box_id]['video_id'] = video_id
         videos[box_id]['thumbnail'] = map(
             int, [box.timestamp, box.x, box.y, box.width, box.height])
     data = {
         'question': self.question,
         'image_bucket': config.get('affine.s3.bucket'),
     }
     data['data'] = {
         "evaluator_id":
         str(self.id),
         "videos":
         dict(videos),
         "reference_image":
         self.reference_image_url
         if self.reference_image_url is not None else '',
     }
     return data
    def submit_hits(cls, evaluator_id, image_folder_path, job_id):
        """ Submit On-demand HITs to MTurk.
            Function mocks input evaluator when not in prod

            Args:
                evaluator_id: evaluator to be used for the job
                image_folder_path: path to local directory with images to be QA'ed
                job_id: name to uniquely identify the job (e.g. logo-prod_2015-01-01)
        """
        evaluator = MechanicalTurkEvaluator.get(evaluator_id)
        if 'sandbox' in config.get("mturk_hostname"):
            evaluator.mock_evaluator = True
        ev_type = evaluator.evaluator_type
        msg = "The selected evaluator is not currently supported"
        assert ev_type in EVALUATOR_FOLDER_TYPE, msg
        logger.info('Uploading to S3...')
        s3_urls = cls._upload_to_s3(image_folder_path, ev_type, job_id)
        logger.info('Creating HITs...')
        if ev_type == 'images':
            cls._submit_image_hits(evaluator, s3_urls, job_id)
        elif ev_type == 'page_text':
            cls._submit_screenshot_hits(evaluator, s3_urls, job_id)
        elif ev_type == 'videos':
            cls._submit_collage_hits(evaluator, s3_urls, job_id)
Esempio n. 16
0
 def get_s3_bucket(cls, is_on_demand):
     """ Returns the correct S3 bucket depending on on-demand or regular HIT """
     return config.get('affine.s3.on_demand_qa_bucket'
                       ) if is_on_demand else config.get('affine.s3.bucket')
Esempio n. 17
0
 def _queue_name(self):
     return self._QUEUE_NAME_FMT % dict(env=config.get('env'), id=self.id)
Esempio n. 18
0
 def mock_evaluator(self, value):
     message = "Cannot mock evaluator in prod"
     assert 'sandbox' in config.get('mturk_hostname'), message
     message = "Input value should be boolean"
     assert isinstance(value, bool), message
     self.__mock_evaluator = value
Esempio n. 19
0
 def __init__(self):
     table_name = config.get('dynamo.face_signature_table_name')
     super(DynamoFaceSignatureClient, self).__init__(table_name)