def peer_review_and_rate(self, project_key, worker_responses, inter_task_review=False): """ Performs peer review for all the worker responses and when all ratings from peer feedback are received, these ratings are fed back to the platform to update worker ratings. :param project_key: string key for the project as shown in Daemo's Project Authoring Interface. It is unique for each project :param worker_responses: list of worker responses to the given task :param review_completed: a callback function to process all the ratings received from peer feedback on the worker responses :param inter_task_review: a boolean value to control if peer feedback should be allowed across workers on same task or not. If True, it will allow peer feedback for workers for any task they completed in the past irrespective of their similiarity. If False, it only allows peer feedback among workers for the same task they completed :return: review response """ log.debug(msg="initiating peer review and rating...") check_dependency(project_key is not None and len(project_key) > 0, Error.required("project_key")) check_dependency( worker_responses is not None and len(worker_responses) >= 0, Error.required("worker_responses")) self._peer_review(project_key=project_key, worker_responses=worker_responses, inter_task_review=inter_task_review, review_completed=self._review_completed)
def peer_review(self, project_key, worker_responses, review_completed, inter_task_review=False): """ Performs peer review for all the worker responses and when all ratings from peer feedback are received, ``review_completed`` callback is triggered. :param project_key: string key for the project as shown in Daemo's Project Authoring Interface. It is a unique for each project :param worker_responses: list of worker responses to the given task :param review_completed: a callback function to process all the ratings received from peer feedback on the worker responses :param inter_task_review: a boolean value to control if peer feedback should be allowed across workers on same task or not. If True, it will allow peer feedback for workers for any task they completed in the past irrespective of their similiarity. If False, it only allows peer feedback among workers for the same task they completed :: def review_completed(worker_responses): client.rate(PROJECT_KEY, worker_responses) client.peer_review( project_key=PROJECT_KEY, worker_responses=worker_responses, review_completed=review_completed ) :return: review response """ log.info(msg="initiating peer review...") check_dependency(project_key is not None and len(project_key) > 0, Error.required("project_key")) check_dependency(worker_responses is not None and len(worker_responses) >= 0, Error.required("worker_responses")) check_dependency(isfunction(review_completed), Error.func_def_undefined("review_completed")) self._peer_review( project_key=project_key, worker_responses=worker_responses, inter_task_review=inter_task_review, review_completed=review_completed )
def refresh_tokens(self): self.load_tokens() data = { CLIENT_ID: self.client_id, GRANT_TYPE: REFRESH_TOKEN, REFRESH_TOKEN: self.refresh_token } auth_response = self.client.post(AUTH_TOKEN_URL, data=data, is_json=False, authorization=False) response = auth_response.json() if "error" in response: raise ServerException("auth", REFRESH_TOKEN_FAILED, 400) check_dependency( response[ACCESS_TOKEN] is not None and len(response[ACCESS_TOKEN]) > 0, Error.required(ACCESS_TOKEN)) check_dependency( response[REFRESH_TOKEN] is not None and len(response[REFRESH_TOKEN]) > 0, Error.required(REFRESH_TOKEN)) self.access_token = response.get(ACCESS_TOKEN) self.refresh_token = response.get(REFRESH_TOKEN) self.persist_tokens()
def _process_task(self, payload): taskworker_id = int(payload.get("taskworker_id", 0)) task_id = int(payload.get("task_id", 0)) worker_id = int(payload.get("worker_id", 0)) project_key = payload.get("project_key", None) taskworker = payload.get("taskworker", None) task_group_id = int(payload.get("task_group_id", 0)) expected = int(payload.get("expected", 0)) if task_group_id == 0 and taskworker is not None: task_group_id = int(taskworker.get("task_group_id", 0)) if task_group_id in self.store.tasks: check_dependency(taskworker_id > 0, Error.required("taskworker_id")) check_dependency(task_id > 0, Error.required("task_id")) check_dependency(task_group_id > 0, Error.required("task_group_id")) check_dependency(project_key is not None, Error.required("project_key")) batch_indices = self.store.tasks[task_group_id]["batches"] if taskworker is None: task_data = self.api_client.get_task_results_by_taskworker_id(taskworker_id) task_data = transform_task_results(task_data) else: task_data = transform_task_results(taskworker) for batch_index in batch_indices: check_dependency( batch_index < len(self.store.batches) and self.store.batches[batch_index] is not None, "Missing batch for task" ) check_dependency(task_data is not None, "No worker responses for task %d found" % task_id) config = self.store.batches[batch_index] task_data["accept"] = False approve = config["approve"] completed = config["completed"] stream = config["stream"] # update expected tasks if expected > 0: self.store.batches[batch_index]["expected"][task_group_id] = expected # increment count to track completion self.store.batches[batch_index]["submissions"][task_group_id] += 1 if stream: self._stream_response(batch_index, task_id, task_group_id, taskworker_id, task_data, approve, completed) else: self._aggregate_responses(batch_index, task_id, task_group_id, taskworker_id, task_data, approve, completed) self.check_for_pending_tasks_reviews() else: log.debug("No corresponding task found. Worker response ignored.")
def _load_tokens(self): with open(self.credentials_path, READ_ONLY) as infile: fcntl.flock(infile.fileno(), fcntl.LOCK_EX) data = json.load(infile) assert data[CLIENT_ID] is not None and len(data[CLIENT_ID]) > 0, Error.required(CLIENT_ID) assert data[ACCESS_TOKEN] is not None and len(data[ACCESS_TOKEN]) > 0, Error.required(ACCESS_TOKEN) assert data[REFRESH_TOKEN] is not None and len(data[REFRESH_TOKEN]) > 0, Error.required(REFRESH_TOKEN) self.client_id = data[CLIENT_ID] self.access_token = data[ACCESS_TOKEN] self.refresh_token = data[REFRESH_TOKEN]
def __init__(self): assert TW_CONSUMER_KEY != '', Error.required('TW_CONSUMER_KEY') assert TW_CONSUMER_SECRET != '', Error.required('TW_CONSUMER_SECRET') assert TW_ACCESS_TOKEN != '', Error.required('TW_ACCESS_TOKEN') assert TW_ACCESS_TOKEN_SECRET != '', Error.required( 'TW_ACCESS_TOKEN_SECRET') auth = OAuth(consumer_key=TW_CONSUMER_KEY, consumer_secret=TW_CONSUMER_SECRET, token=TW_ACCESS_TOKEN, token_secret=TW_ACCESS_TOKEN_SECRET) self.client = Twitter(auth=auth)
def __init__(self): assert TW_CONSUMER_KEY != '', Error.required('TW_CONSUMER_KEY') assert TW_CONSUMER_SECRET != '', Error.required('TW_CONSUMER_SECRET') assert TW_ACCESS_TOKEN != '', Error.required('TW_ACCESS_TOKEN') assert TW_ACCESS_TOKEN_SECRET != '', Error.required('TW_ACCESS_TOKEN_SECRET') auth = OAuth( consumer_key=TW_CONSUMER_KEY, consumer_secret=TW_CONSUMER_SECRET, token=TW_ACCESS_TOKEN, token_secret=TW_ACCESS_TOKEN_SECRET ) self.client = Twitter(auth=auth)
def _load_tokens(self): with open(self.credentials_path, READ_ONLY) as infile: fcntl.flock(infile.fileno(), fcntl.LOCK_EX) data = json.load(infile) assert data[CLIENT_ID] is not None and len( data[CLIENT_ID]) > 0, Error.required(CLIENT_ID) assert data[ACCESS_TOKEN] is not None and len( data[ACCESS_TOKEN]) > 0, Error.required(ACCESS_TOKEN) assert data[REFRESH_TOKEN] is not None and len( data[REFRESH_TOKEN]) > 0, Error.required(REFRESH_TOKEN) self.client_id = data[CLIENT_ID] self.access_token = data[ACCESS_TOKEN] self.refresh_token = data[REFRESH_TOKEN]
def load_tokens(self): with open(self.credentials_path, "r") as infile: data = json.load(infile) check_dependency( data[CLIENT_ID] is not None and len(data[CLIENT_ID]) > 0, Error.required(CLIENT_ID)) check_dependency( data[ACCESS_TOKEN] is not None and len(data[ACCESS_TOKEN]) > 0, Error.required(ACCESS_TOKEN)) check_dependency( data[REFRESH_TOKEN] is not None and len(data[REFRESH_TOKEN]) > 0, Error.required(REFRESH_TOKEN)) self.client_id = data[CLIENT_ID] self.access_token = data[ACCESS_TOKEN] self.refresh_token = data[REFRESH_TOKEN]
def peer_review(self, project_key, worker_responses, review_completed, inter_task_review=False): """ Performs peer review for all the worker responses and when all ratings from peer feedback are received, ``review_completed`` callback is triggered. :param project_key: string key for the project as shown in Daemo's Project Authoring Interface. It is a unique for each project :param worker_responses: list of worker responses to the given task :param review_completed: a callback function to process all the ratings received from peer feedback on the worker responses :param inter_task_review: a boolean value to control if peer feedback should be allowed across workers on same task or not. If True, it will allow peer feedback for workers for any task they completed in the past irrespective of their similiarity. If False, it only allows peer feedback among workers for the same task they completed :: def review_completed(worker_responses): client.rate(PROJECT_KEY, worker_responses) client.peer_review( project_key=PROJECT_KEY, worker_responses=worker_responses, review_completed=review_completed ) :return: review response """ log.info(msg="initiating peer review...") check_dependency(project_key is not None and len(project_key) > 0, Error.required("project_key")) check_dependency( worker_responses is not None and len(worker_responses) >= 0, Error.required("worker_responses")) check_dependency(isfunction(review_completed), Error.func_def_undefined("review_completed")) self._peer_review(project_key=project_key, worker_responses=worker_responses, inter_task_review=inter_task_review, review_completed=review_completed)
def __init__(self, credentials_path='credentials.json', rerun_key=None, multi_threading=False, host=None, is_secure=True, is_sandbox=False, log_config=None): # log using default logging config if no config provided if log_config is None: logging_path = os.path.join( os.path.dirname(os.path.abspath(__file__)), 'logging.conf') with open(logging_path) as f: log_config = yaml.load(f) logging.config.dictConfig(log_config) log.info(msg="initializing client...") check_dependency( credentials_path is not None and len(credentials_path) > 0, Error.required("credentials_path")) self.credentials_path = credentials_path self.rerun_key = rerun_key self.multi_threading = multi_threading self.http_proto = "http://" self.websock_proto = "ws://" if is_secure: self.http_proto = "https://" self.websock_proto = "wss://" self.host = PRODUCTION if is_sandbox: self.host = SANDBOX if host is not None: self.host = host self.pid = os.getpid() self.api_client = ApiClient(self.credentials_path, self.host, self.http_proto) self.store = Store() self._open_channel()
def peer_review_and_rate(self, project_key, worker_responses, inter_task_review=False): """ Performs peer review for all the worker responses and when all ratings from peer feedback are received, these ratings are fed back to the platform to update worker ratings. :param project_key: string key for the project as shown in Daemo's Project Authoring Interface. It is unique for each project :param worker_responses: list of worker responses to the given task :param review_completed: a callback function to process all the ratings received from peer feedback on the worker responses :param inter_task_review: a boolean value to control if peer feedback should be allowed across workers on same task or not. If True, it will allow peer feedback for workers for any task they completed in the past irrespective of their similiarity. If False, it only allows peer feedback among workers for the same task they completed :return: review response """ log.debug(msg="initiating peer review and rating...") check_dependency(project_key is not None and len(project_key) > 0, Error.required("project_key")) check_dependency(worker_responses is not None and len(worker_responses) >= 0, Error.required("worker_responses")) self._peer_review( project_key=project_key, worker_responses=worker_responses, inter_task_review=inter_task_review, review_completed=self._review_completed )
def __init__(self, credentials_path, host=daemo.HOST, rerun_key=None, multi_threading=False): assert credentials_path is not None and len( credentials_path) > 0, Error.required("credentials_path") self.client_id = None self.access_token = None self.refresh_token = None self.ws_process = None self.projects = None self.batches = {} self.batches_in_progress = set() self.cache = [] self.aggregated_data = [] self.credentials_path = credentials_path self.rerun_key = rerun_key self.multi_threading = multi_threading self.host = host self.queue = multiprocessing.Queue() if self._credentials_exist(): self._load_tokens() else: self._persist_tokens() self.session = requests.session() self._refresh_token() self._register_signals() self._monitor_messages() self._connect() if self.rerun_key is not None and len(self.rerun_key) > 0: self._fetch_batch_config(self.rerun_key)
def __init__(self, credentials_path='credentials.json', rerun_key=None, multi_threading=False, host=None, is_secure=True, is_sandbox=False, log_config=None): # log using default logging config if no config provided if log_config is None: logging_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'logging.conf') with open(logging_path) as f: log_config = yaml.load(f) logging.config.dictConfig(log_config) log.info(msg="initializing client...") check_dependency(credentials_path is not None and len(credentials_path) > 0, Error.required("credentials_path")) self.credentials_path = credentials_path self.rerun_key = rerun_key self.multi_threading = multi_threading self.http_proto = "http://" self.websock_proto = "ws://" if is_secure: self.http_proto = "https://" self.websock_proto = "wss://" self.host = PRODUCTION if is_sandbox: self.host = SANDBOX if host is not None: self.host = host self.pid = os.getpid() self.api_client = ApiClient(self.credentials_path, self.host, self.http_proto) self.store = Store() self._open_channel()
def __init__(self, credentials_path, host=daemo.HOST, rerun_key=None, multi_threading=False): assert credentials_path is not None and len(credentials_path) > 0, Error.required("credentials_path") self.client_id = None self.access_token = None self.refresh_token = None self.ws_process = None self.projects = None self.batches = {} self.batches_in_progress = set() self.cache = [] self.aggregated_data = [] self.credentials_path = credentials_path self.rerun_key = rerun_key self.multi_threading = multi_threading self.host = host self.queue = multiprocessing.Queue() if self._credentials_exist(): self._load_tokens() else: self._persist_tokens() self.session = requests.session() self._refresh_token() self._register_signals() self._monitor_messages() self._connect() if self.rerun_key is not None and len(self.rerun_key) > 0: self._fetch_batch_config(self.rerun_key)
def publish(self, project_key, tasks, approve, completed, mock_workers=None, stream=False): """ Publishes the project if not already published and creates new tasks based on the tasks list provided A typical usage is given below and each of the callbacks are explained further: :: client.publish( project_key='k0BXZxVz4P3w', tasks=[{ "id": id, "tweet": text }], approve=approve_tweet, completed=post_to_twitter ) :param project_key: string key for the project as shown in Daemo's Project Authoring Interface. It is unique for each project. :param tasks: list object with data for each task in a key-value pair where each key is used in Daemo's Project Authoring Interface as replaceable value A typical tasks list object is given below which passes an id and tweet text as input for each task. Remember these keys -- id, tweet -- have been used while creating task fields on Daemo task authoring interface. :: tasks=[{ "id": id, "tweet": text }] :param approve: a callback function which process worker responses to produce boolean value indicating if each worker response should be accepted and thus, paid or not. A typical approve callback function is given below which checks if tweet text in worker response is not empty. :: def approve_tweet(worker_responses): approvals = [len(get_tweet_text(response)) > 0 for response in worker_responses] return approvals :param completed: a callback function similiar to approve callback but process only the approved worker responses. It doesn't return any value. A typical completed callback function is given below which posts all the approved worker responses to twitter. :: def post_to_twitter(worker_responses): for worker_response in worker_responses: twitter.post(worker_response) :param mock_workers: a callback function which simulates workers passing responses to different tasks A typical mock_workers callback function is given below which provides some text for tweet on behalf of *count* number of workers. :: def mock_workers(task, count): results = [ [{ "name": "tweet", "value": "%d. Trump Trump everywhere not a Hillary to see." % num }] for num in range(count)] return results :param stream: a boolean value which controls whether worker response should be received as soon as each worker has submitted or wait for all of them to complete. """ log.info(msg="publishing project...") check_dependency(project_key is not None and len(project_key) > 0, Error.required("project_key")) check_dependency(tasks is not None and len(tasks) >= 0, Error.required("tasks")) check_dependency(isfunction(approve), Error.func_def_undefined("approve")) check_dependency(isfunction(completed), Error.func_def_undefined("completed")) if mock_workers is not None: check_dependency(isfunction(mock_workers), Error.func_def_undefined("mock_workers")) thread = callback_thread(name='publish', target=self._publish, kwargs=dict(project_key=project_key, tasks=tasks, approve=approve, completed=completed, stream=stream, mock_workers=mock_workers, rerun_key=self.rerun_key)) thread.start()
def onOpen(self): log.info("channel opened") assert hasattr(self.factory, "queue") and self.factory.queue is not None, \ Error.required("queue")
def _process_task(self, payload): taskworker_id = int(payload.get("taskworker_id", 0)) task_id = int(payload.get("task_id", 0)) worker_id = int(payload.get("worker_id", 0)) project_key = payload.get("project_key", None) taskworker = payload.get("taskworker", None) task_group_id = int(payload.get("task_group_id", 0)) expected = int(payload.get("expected", 0)) if task_group_id == 0 and taskworker is not None: task_group_id = int(taskworker.get("task_group_id", 0)) if task_group_id in self.store.tasks: check_dependency(taskworker_id > 0, Error.required("taskworker_id")) check_dependency(task_id > 0, Error.required("task_id")) check_dependency(task_group_id > 0, Error.required("task_group_id")) check_dependency(project_key is not None, Error.required("project_key")) batch_indices = self.store.tasks[task_group_id]["batches"] if taskworker is None: task_data = self.api_client.get_task_results_by_taskworker_id( taskworker_id) task_data = transform_task_results(task_data) else: task_data = transform_task_results(taskworker) for batch_index in batch_indices: check_dependency( batch_index < len(self.store.batches) and self.store.batches[batch_index] is not None, "Missing batch for task") check_dependency( task_data is not None, "No worker responses for task %d found" % task_id) config = self.store.batches[batch_index] task_data["accept"] = False approve = config["approve"] completed = config["completed"] stream = config["stream"] # update expected tasks if expected > 0: self.store.batches[batch_index]["expected"][ task_group_id] = expected # increment count to track completion self.store.batches[batch_index]["submissions"][ task_group_id] += 1 if stream: self._stream_response(batch_index, task_id, task_group_id, taskworker_id, task_data, approve, completed) else: self._aggregate_responses(batch_index, task_id, task_group_id, taskworker_id, task_data, approve, completed) self.check_for_pending_tasks_reviews() else: log.debug("No corresponding task found. Worker response ignored.")
def _processMessage(self, payload, isBinary): if not isBinary: response = json.loads(payload.decode("utf8")) taskworker_id = int(response.get("taskworker_id", 0)) task_id = int(response.get("task_id", 0)) project_key = response.get("project_hash_id", None) batch = response.get("batch", None) # ignore data pushed via GUI (has no batch info) if batch is not None: assert taskworker_id > 0, Error.required("taskworker_id") assert task_id > 0, Error.required("task_id") assert project_key is not None, Error.required( "project_hash_id") task_configs = self._get_task_map(project_key, task_id, batch["id"]) if task_configs is not None and len(task_configs) > 0: task_data = self._get_task_results_by_taskworker_id( taskworker_id) if task_data is not None: task_data["accept"] = False for config in task_configs: approve = config["approve"] completed = config["completed"] stream = config["stream"] aggregation_id = config["aggregation_id"] if stream: if approve([task_data]): task_data["accept"] = True task_status = self._update_status(task_data) task_status.raise_for_status() if task_data["accept"]: completed([task_data]) is_done = self._fetch_batch_status( project_key, aggregation_id) if is_done: # remove it from global list of projects self._remove_batch(aggregation_id) else: # store it for aggregation (stream = False) self._aggregate(project_key, task_id, aggregation_id, task_data) is_done = self._fetch_batch_status( project_key, aggregation_id) if is_done: tasks_data = self._get_aggregated( aggregation_id) approvals = approve(tasks_data) for approval in approvals: task_data["accept"] = approval task_status = self._update_status( task_data) task_status.raise_for_status() approved_tasks = [ x[0] for x in zip(tasks_data, approvals) if x[1] ] completed(approved_tasks) self._remove_batch(aggregation_id) if self._all_batches_complete(): self._stop()
def publish(self, project_key, tasks, approve, completed, mock_workers=None, stream=False): """ Publishes the project if not already published and creates new tasks based on the tasks list provided A typical usage is given below and each of the callbacks are explained further: :: daemo.publish( project_key='k0BXZxVz4P3w', tasks=[{ "id": id, "tweet": text }], approve=approve_tweet, completed=post_to_twitter ) :param project_key: string key for the project as shown in Daemo's Project Authoring Interface. It is a unique for each project. :param tasks: list object with data for each task in a key-value pair where each key is used in Daemo's Project Authoring Interface as replaceable value A typical tasks list object is given below which passes an id and tweet text as input for each task. Remember these keys -- id, tweet -- have been used while creating task fields on Daemo task authoring inteface. :: tasks=[{ "id": id, "tweet": text }] :param approve: a callback function which process worker responses to produce boolean value indicating if each worker response should be accepted and thus, paid or not. A typical approve callback function is given below which checks if tweet text in worker response is not empty. :: def approve_tweet(worker_responses): approvals = [len(get_tweet_text(response)) > 0 for response in worker_responses] return approvals :param completed: a callback function similiar to approve callback but process only the approved worker responses. It doesn't return any value. A typical completed callback function is given below which posts all the approved worker responses to twitter. :: def post_to_twitter(worker_responses): for worker_response in worker_responses: twitter.post(worker_response) :param mock_workers: a callback function which simulates workers passing responses to different tasks A typical mock_workers callback function is given below which provides some text for tweet on behalf of *count* number of workers. :: def mock_workers(task, count): results = [ [{ "name": "tweet", "value": "%d. Trump Trump everywhere not a Hillary to see." % num }] for num in range(count)] return results :param stream: a boolean value which controls whether worker response should be received as soon as each worker has submitted or wait for all of them to complete. """ assert project_key is not None and len(project_key) > 0, Error.required("project_key") assert tasks is not None and len(tasks) >= 0, Error.required("tasks") assert isfunction(approve), Error.func_def_undefined(APPROVE) assert isfunction(completed), Error.func_def_undefined(CALLBACK) if mock_workers is not None: assert isfunction(mock_workers), Error.func_def_undefined("mock_workers") thread = threading.Thread( target=self._publish, kwargs=dict( project_key=project_key, tasks=tasks, approve=approve, completed=completed, stream=stream, mock_workers=mock_workers, rerun_key=self.rerun_key ) ) thread.start()
def _processMessage(self, payload, isBinary): if not isBinary: response = json.loads(payload.decode("utf8")) taskworker_id = int(response.get("taskworker_id", 0)) task_id = int(response.get("task_id", 0)) project_key = response.get("project_hash_id", None) batch = response.get("batch", None) # ignore data pushed via GUI (has no batch info) if batch is not None: assert taskworker_id > 0, Error.required("taskworker_id") assert task_id > 0, Error.required("task_id") assert project_key is not None, Error.required("project_hash_id") task_configs = self._get_task_map(project_key, task_id, batch["id"]) if task_configs is not None and len(task_configs) > 0: task_data = self._get_task_results_by_taskworker_id(taskworker_id) if task_data is not None: task_data["accept"] = False for config in task_configs: approve = config["approve"] completed = config["completed"] stream = config["stream"] aggregation_id = config["aggregation_id"] if stream: if approve([task_data]): task_data["accept"] = True task_status = self._update_status(task_data) task_status.raise_for_status() if task_data["accept"]: completed([task_data]) is_done = self._fetch_batch_status(project_key, aggregation_id) if is_done: # remove it from global list of projects self._remove_batch(aggregation_id) else: # store it for aggregation (stream = False) self._aggregate(project_key, task_id, aggregation_id, task_data) is_done = self._fetch_batch_status(project_key, aggregation_id) if is_done: tasks_data = self._get_aggregated(aggregation_id) approvals = approve(tasks_data) for approval in approvals: task_data["accept"] = approval task_status = self._update_status(task_data) task_status.raise_for_status() approved_tasks = [x[0] for x in zip(tasks_data, approvals) if x[1]] completed(approved_tasks) self._remove_batch(aggregation_id) if self._all_batches_complete(): self._stop()
def onOpen(self): logging.debug("### channel opened ###") assert hasattr(self.factory, "queue") and self.factory.queue is not None, \ Error.required("queue")