def has_worker_submitted(con, job_id, worker_id, treatment=None): """ Return True if the worker's submission was found """ base = "prop" table_data = get_table(base, job_id=job_id, schema="data", treatment=treatment) if table_exists(con, table_data): sql_data = f"select * from {table_data} where (job_id=? or job_id like '{REF_JOB_ID_PREFIX}%') and (resp_worker_id=?)" res = con.execute(sql_data, (job_id, worker_id)).fetchone() if res: return True table_result = get_table(base, job_id=job_id, schema="result", treatment=treatment) if table_exists(con, table_result): sql_result = f"select * from {table_result} where (job_id=? or job_id like '{REF_JOB_ID_PREFIX}%') and (prop_worker_id=? or resp_worker_id=?)" res = con.execute(sql_result, (job_id, worker_id, worker_id)).fetchone() if res: return True return False
def get_features(job_id, resp_worker_id, treatment, tasks=None, tasks_features=None): """ :returns: (numpy.array) features :returns: (dict) features_rows untransformed """ app.logger.debug("get_features") MODEL_INFOS_KEY = f"{TREATMENTS_MODEL_REFS[treatment.upper()]}_MODEL_INFOS" if tasks is None: tasks = app.config["TASKS"] con = get_db("RESULT") if tasks_features is None: tasks_features = app.config["TASKS_FEATURES"] row_features = dict() for task_name, features in tasks_features.items(): if task_name in tasks: task_table = get_table(task_name, job_id=job_id, schema="result", is_task=True) if table_exists(con, task_table): with con: sql = f"SELECT {','.join(features)} FROM {task_table} WHERE worker_id=?" res = con.execute(sql, (resp_worker_id, )).fetchone() # task tables are shared. when using a REF, there the table may exists but without valid rows if res is not None: row_features.update(dict(res)) resp_features = {"resp": ["resp_time_spent"]} for name, features in resp_features.items(): table = get_table(name, job_id=job_id, treatment=treatment, schema="result", is_task=False) if table_exists(con, table): with con: sql = f"SELECT {','.join(features)} FROM {table} WHERE worker_id=?" res = con.execute(sql, (resp_worker_id, )).fetchone() # task tables are shared. when using a REF, there the table may exists but without valid rows if res is not None: row_features.update(dict(res)) tmp_df = pd.DataFrame(data=[row_features]) REF_MODEL_KEY = f"{TREATMENTS_MODEL_REFS[treatment.upper()]}_MODEL" dss_available = bool(app.config.get(REF_MODEL_KEY)) if dss_available: x, _ = df_to_xy( tmp_df, select_columns=app.config[MODEL_INFOS_KEY]["top_columns"]) else: x = None app.logger.debug("get_features - done") return x, row_features
def get_treaments_infos(con, treatments): all_infos = {} for treatment in treatments: infos = {} table_survey = f"result__{treatment}_survey" if table_exists(con, table_survey): sql_completed_surveys = f"""select r.job_id, count(*) count, (select count(*) from {table_survey} where job_id==r.job_id and completion_code=='dropped') dropped FROM {table_survey} r GROUP BY job_id; """ res = con.execute(sql_completed_surveys).fetchall() res = [{ 'job_id': item[0], 'count': item[1], "dropped": item[2] } for item in res] infos["survey"] = res else: infos["survey"] = None table_resp = f"result__{treatment}_resp" if table_exists(con, table_resp): sql_completed_resp = f""" SELECT job_id, count(*) FROM {table_resp} GROUP BY job_id """ completed_resp = dict(con.execute(sql_completed_resp).fetchall()) infos["resp"] = completed_resp else: infos["resp"] = None table_prop = f"result__{treatment}_prop" if table_exists(con, table_prop): sql_completed_prop = f""" SELECT job_id, count(*) FROM {table_prop} GROUP BY job_id """ completed_prop = dict(con.execute(sql_completed_prop).fetchall()) infos["prop"] = completed_prop else: infos["prop"] = None all_infos[treatment] = infos return all_infos
def handle_index_feedback(treatment, base_treatment): job_id = request.args.get("job_id", "na") worker_id = request.args.get("worker_id", "na") max_judgments = None try: max_judgments = int(request.args.get("max_judgments", "0")) except ValueError: pass previous_worker_code = get_previous_worker_code(job_id, worker_id, base_treatment) app.logger.debug(f"handle_index: job_id: {job_id}, worker_id: {worker_id}") is_proposer = check_is_proposer_next(job_id, worker_id, treatment, max_judgments=max_judgments) table_all = get_table(BASE, "all", schema=None) con = get_db() if table_exists(con, table_all): with con: res = con.execute(f"SELECT * from {table_all} WHERE worker_id=?", (worker_id, )).fetchone() # if res: # flash(f"You already took part on this survey. Thank you for your participation") # return render_template("error.html") if previous_worker_code is None: if is_proposer: return redirect(url_for(f"{treatment}.prop.index", **request.args)) else: return redirect(url_for(f"{treatment}.resp.index", **request.args)) else: if prop_BASE in previous_worker_code: return redirect(url_for(f"{treatment}.prop.index", **request.args)) else: return redirect(url_for(f"{treatment}.resp.index", **request.args))
def get_row_ignore_job(con, job_id, worker_id, treatment): """ Get a row and change it's state to Judging with a last modified time :param con: sqlite3|sqlalchemy connection :param job_id: job id :param worker_id: worker's id :param treatment: """ app.logger.debug("get_row") table = get_table(base=BASE, job_id=job_id, schema="data", treatment=treatment) if not table_exists(con, table): app.logger.warning(f"table: {table} does not exist") return None res = None dep_change_time = time.time() - JUDGING_TIMEOUT_SEC # Let's check if the same worker is looking for a row (then give him the one he is working on back) free_rowid = con.execute( f'select {PK_KEY} from {table} where {STATUS_KEY}==? and {WORKER_KEY}==? and {LAST_MODIFIED_KEY} > ?', (RowState.JUDGING, worker_id, dep_change_time)).fetchone() if not free_rowid: # let's search for a row that remained too long in the 'judging' state free_rowid = con.execute( f'select {PK_KEY} from {table} where {STATUS_KEY}==? and {LAST_MODIFIED_KEY} < ?', (RowState.JUDGING, dep_change_time)).fetchone() if not free_rowid: # Let's check for a judgeable untouched row free_rowid = con.execute( f'select {PK_KEY} from {table} where {STATUS_KEY}==?', (RowState.JUDGEABLE, )).fetchone() if free_rowid: free_rowid = free_rowid[PK_KEY] ## let's search for a rowid that hasn't been processed yetf res = dict( con.execute( f'select {PK_KEY}, * from {table} where {PK_KEY}=?', (free_rowid, )).fetchone()) with con: update( f'update {table} set {LAST_MODIFIED_KEY}=?, {STATUS_KEY}=?, {WORKER_KEY}=? where {PK_KEY}=?', (time.time(), RowState.JUDGING, worker_id, free_rowid), con=con) return res if free_rowid: free_rowid = free_rowid[PK_KEY] res = dict( con.execute(f'select {PK_KEY}, * from {table} where {PK_KEY}=?', (free_rowid, )).fetchone()) with con: update( f'UPDATE {table} set {LAST_MODIFIED_KEY}=?, {STATUS_KEY}=?, {WORKER_KEY}=? where {PK_KEY}=?', (time.time(), RowState.JUDGING, worker_id, free_rowid), con=con) else: app.logger.warning( f"no row available! job_id: {job_id}, worker_id: {worker_id}") return res
def create_resp_data_auto_prop_table(treatment, ref, use_ai_offer=None): app.logger.debug(f"create_resp_data_auto_prop_table - {ref}, {treatment}") con = get_db() table = get_table(BASE, None, "data", treatment=treatment) assert len(ref) == 4, "expected references of the form <txyz>" required_columns = """ai_calls_acceptance_probability,ai_calls_best_offer_probability,ai_calls_count_repeated,ai_calls_offers,ai_calls_pauses,ai_nb_calls,ai_offer,feedback_accuracy,feedback_explanation,feedback_understanding,job_id,model_type,offer,offer_dss,offer_final,prop_time_spent,prop_worker_id,timestamp,worker_id""".split( ",") columns_to_clear = """ai_calls_acceptance_probability,ai_calls_best_offer_probability,ai_calls_count_repeated,ai_calls_offers,ai_calls_pauses,ai_nb_calls,feedback_accuracy,feedback_explanation,feedback_understanding,job_id,prop_time_spent,timestamp,worker_id""".split( ",") if not table_exists(con, table): df = pd.read_csv( os.path.join(CODE_DIR, 'data', ref, 'export', f'result__{ref}_prop.csv')) for col in required_columns: if col not in df.columns: df[col] = None columns = [col for col in required_columns if col in df.columns] df = df[columns] if use_ai_offer: df["offer_final"] = df["ai_offer"] df["offer"] = df["ai_offer"] df["offer_dss"] = df["ai_offer"] df[[col for col in columns_to_clear]] = None df["job_id"] = f"REFAUTO{ref.upper()}" df[STATUS_KEY] = RowState.JUDGEABLE df[WORKER_KEY] = None df["updated"] = 0 with con: df.to_sql(table, con, index=False) app.logger.debug("create_table_data: table created")
def close_row(con, job_id, row_id, treatment): app.logger.debug("close_row") table = get_table(BASE, job_id=job_id, schema="data", treatment=treatment) if not table_exists(con, table): app.logger.warning(f"table missing: <{table}>") else: with con: update( f'UPDATE {table} set {LAST_MODIFIED_KEY}=?, {STATUS_KEY}=? where {PK_KEY}=? and {STATUS_KEY}=?', (time.time(), RowState.JUDGED, row_id, RowState.JUDGING), con=con) app.logger.debug("close_row - done")
def update_job(con, job_id, job_config, table="jobs"): if not table_exists(con, table): insert(pd.DataFrame(data=[job_config]), table=table, con=con) else: with con: check = con.execute(f"SELECT job_id from jobs where job_id==?", (job_id, )).fetchone() if check: update( f"UPDATE {table} SET api_key=?, base_code=?, expected_judgments=?, payment_max_cents=?", args=(job_config['api_key'], job_config['base_code'], job_config['expected_judgments'], job_config['payment_max_cents']), con=con) else: insert(pd.DataFrame(data=[job_config]), table=table, con=con)
def get_job_config(con, job_id, table="jobs"): """ Return a job config or the default configuration if the job wasn't found :param con: (Connection) :param job_id: :param table: """ _job_config = None if table_exists(con, table): with con: _job_config = con.execute(f"SELECT * from {table} WHERE job_id==?", (job_id, )).fetchone() if _job_config is None: job_config = JobConfig(job_id, api_key='') else: job_config = JobConfig(**_job_config) return job_config
def create_prop_data_table(treatment, ref): app.logger.debug(f"create_table_data - {ref}, {treatment}") con = get_db() table_data = get_table(BASE, None, "data", treatment=treatment) table_resp = get_table("resp", None, "result", treatment=treatment) assert len(ref) == 4, "expected references of the form <txyz>" job_id = f"REF{ref.upper()}" if not table_exists(con, table_data): df_resp = None try: # normaly, we expect an exported data-table to be available with all required features df = pd.read_csv( os.path.join(CODE_DIR, 'data', ref, 'export', f'data__{ref}_prop.csv')) df[STATUS_KEY] = RowState.JUDGEABLE df[WORKER_KEY] = None df["updated"] = 0 df["job_id"] = f"REF{ref.upper()}" with con: df.to_sql(table_data, con, index=False) app.logger.debug("create_table_data: table created") df_resp = pd.read_csv( os.path.join(CODE_DIR, 'data', ref, 'export', f'result__{ref}_resp.csv')) df["job_id"] = f"REF{ref.upper()}" except Exception as err: app.logger.warn(f"silenced-error: {err}") # otherwise, we work with the resp. table, which means the model does/should not expect any features. df = pd.read_csv( os.path.join(CODE_DIR, 'data', ref, 'export', f'result__{ref}_resp.csv')) df["job_id"] = f"REF{ref.upper()}" for idx in range(len(df)): resp_row = df.iloc[idx] resp_row = dict(**resp_row) insert_row(job_id, resp_row, treatment) app.logger.debug("create_table_data: table created") df_resp = df df_resp.to_sql(table_resp, con, index=False, if_exists='replace') app.logger.debug("resp-table-cloned: table created")
def increase_worker_bonus(job_id, worker_id, bonus_cents, con=None): """ :param job_id: :param worker_id: :param bonus_cents: (int) :param con: """ app.logger.debug( f"increase_worker_bonus - job_id: {job_id}, worker_id: {worker_id}, bonus_cents: {bonus_cents}" ) if con is None: con = get_db("DB") row_data = { 'job_id': job_id, 'worker_id': worker_id, 'timestamp': str(datetime.datetime.now()), 'bonus_cents': bonus_cents, 'paid_bonus_cents': 0 } table = _get_payment_table(job_id) worker_row_exists = False if table_exists(con, table): with con: row = con.execute( f'select *, rowid from {table} WHERE job_id==? and worker_id==?', (job_id, worker_id)).fetchone() if row: worker_row_exists = True row_data["bonus_cents"] += row["bonus_cents"] row_data["paid_bonus_cents"] += row["paid_bonus_cents"] update( f"UPDATE {table} SET bonus_cents=?, paid_bonus_cents=? WHERE rowid=?", (row_data["bonus_cents"], row_data["paid_bonus_cents"], row["rowid"]), con=con) if not worker_row_exists: app.logger.debug( f"increase_worker_bonus: {job_id}, {worker_id}, {bonus_cents}") df = pd.DataFrame(data=[row_data]) insert(df, table, con, unique_fields=["worker_id"]) con.commit()
def _get_worker_bonus_row(job_id, worker_id, con=None): """ :param job_id: :param worker_id: :param con: """ app.logger.debug( f"_get_worker_bonus_row: job_id: {job_id}, worker_id: {worker_id}") if con is None: con = get_db("DB") table = _get_payment_table(job_id) if table_exists(con, table): with con: row = con.execute( f'select *, rowid from {table} WHERE job_id==? and worker_id==?', (job_id, worker_id)).fetchone() if row: row_dict = dict(row) return row_dict else: app.logger.error( f"_get_worker_bonus_row: worker not found! job_id: {job_id}, worker_id: {worker_id}" ) return None
def check_is_proposer_next(job_id, worker_id, treatment, max_judgments=None, resp_only=None, prop_only=None): app.logger.debug("check_is_proposer_next") resp_table = get_table(resp_BASE, job_id=job_id, schema="result", treatment=treatment) prop_table = get_table(prop_BASE, job_id=job_id, schema="result", treatment=treatment) prop_table_data = get_table(prop_BASE, job_id=job_id, schema="data", treatment=treatment) job_config = get_job_config(get_db("DB"), job_id) con = get_db("DATA") nb_resp = 0 nb_prop = 0 nb_prop_open = 0 if table_exists(con, resp_table): with con: tmp = con.execute( f"SELECT COUNT(*) as count from {resp_table} where job_id=?", (job_id, )).fetchone() if tmp: nb_resp = tmp["count"] if table_exists(con, prop_table_data): with con: judging_timeout = time.time() - JUDGING_TIMEOUT_SEC tmp = con.execute( f"SELECT COUNT(*) as count from {prop_table_data} where (job_id=? OR job_id like 'REF%') and ({STATUS_KEY}=? OR ({STATUS_KEY}=? and {LAST_MODIFIED_KEY}<?) OR ({WORKER_KEY}=?))", (job_id, RowState.JUDGEABLE, RowState.JUDGING, judging_timeout, worker_id)).fetchone() if tmp: nb_prop_open = tmp["count"] if table_exists(con, prop_table): with con: tmp = con.execute( f"SELECT COUNT(*) as count from {prop_table} where (job_id=? OR job_id like 'REF%')", (job_id, )).fetchone() if tmp: nb_prop = tmp["count"] #TODO: if nb_resp >= expected row/2, should only take props if max_judgments is None or max_judgments == 0: max_judgments = job_config["expected_judgments"] max_resp = (max_judgments // 2) max_prop = (max_judgments // 2) if resp_only: max_resp = max_judgments elif prop_only: max_prop = max_judgments if max_judgments > 0: #if (max_judgments // 2) <= nb_resp and (max_judgments // 2) > nb_prop: if max_resp <= nb_resp and max_prop > nb_prop: if nb_prop_open > 0: is_proposer = NEXT_IS_PROPOSER else: is_proposer = NEXT_IS_WAITING elif nb_prop_open > 0: is_proposer = NEXT_IS_PROPOSER else: if resp_only or prop_only: is_proposer = NEXT_IS_WAITING else: is_proposer = NEXT_IS_RESPONDER if resp_only: if max_judgments > nb_resp: is_proposer = NEXT_IS_RESPONDER else: is_proposer = NEXT_IS_WAITING elif prop_only: if max_judgments > nb_prop: is_proposer = NEXT_IS_PROPOSER else: is_proposer = NEXT_IS_WAITING elif nb_prop_open > 0: is_proposer = NEXT_IS_PROPOSER else: is_proposer = NEXT_IS_RESPONDER app.logger.debug( f"max_judgments: {max_judgments}, nb_prop: {nb_prop}, nb_resp: {nb_resp}, nb_prop_open: {nb_prop_open}, is_proposer: {is_proposer}" ) return is_proposer
def _process_judgments(signal, payload, job_id, job_config, treatment, auto_finalize=False): """ :param signal: (str) :param payload: (dict) :param job_id: (int|str) :param job_config: (JobConfig) :param auto_finalize (bool) """ error_happened = False app.logger.debug( f"_process_judgments: {signal}, job_id: {job_id}, auto_finalize: {auto_finalize}" ) with app.app_context(): try: if signal == "new_judgments": judgments_count = payload['judgments_count'] fig8 = FigureEight(job_id, job_config["api_key"]) for idx in range(judgments_count): if auto_finalize == True: try: con = get_db("RESULT") worker_judgment = payload['results']['judgments'][ idx] worker_id = worker_judgment["worker_id"] app.logger.debug( f"_process_judgments: {signal}, job_id: {job_id}, worker_id: {worker_id}" ) is_responder = False is_proposer = False table_resp = get_table(resp_BASE, job_id=job_id, schema="result", treatment=treatment) table_prop = get_table(prop_BASE, job_id=job_id, schema="result", treatment=treatment) with con: if table_exists(con, table_resp): res = con.execute( f"SELECT * from {table_resp} WHERE job_id=? and worker_id=?", (job_id, worker_id)).fetchone() if res: is_responder = True if not is_responder and table_exists( con, table_prop): res = con.execute( f"SELECT * from {table_prop} WHERE job_id=? and worker_id=?", (job_id, worker_id)).fetchone() if res: is_proposer = True if is_responder: finalize_resp(job_id=job_id, worker_id=worker_id, treatment=treatment) elif is_proposer: finalize_round(job_id=job_id, prop_worker_id=worker_id, treatment=treatment) else: app.logger.error( f"Error: unknown worker_id: {worker_id} for job_id: {job_id}" ) except Exception as err: if not error_happened: app.log_exception(err) error_happened = True else: worker_judgment = payload['results']['judgments'][idx] worker_id = worker_judgment["worker_id"] pay_worker_bonus(job_id, worker_id, fig8) elif signal == "unit_complete": judgments_count = payload['judgments_count'] fig8 = FigureEight(job_id, job_config["api_key"]) for idx in range(judgments_count): if auto_finalize == False: worker_judgment = payload['results']['judgments'][idx] worker_id = worker_judgment["worker_id"] # PAY_WORKER won't pay someone twice. pay_worker_bonus(job_id, worker_id, fig8) #TODO: may process the whole unit here pass except Exception as err: app.log_exception(err) app.logger.debug(f"_process_judgments: {signal}, job_id: {job_id} - done")
def pay_worker_bonus(job_id, worker_id, api, con=None, assignment_id=None, send_notification=False): """ :param job_id: :param worker_id: :param bonus_cents: :param api: :param overwite: :param con: :param assignment_id: :returns True if payment was done, False otherwise """ app.logger.debug("pay_worker_bonus") if con is None: con = get_db("DB") table = get_table("jobs", job_id=job_id, schema=None, category="payment") job_config = get_job_config(con, job_id) should_pay = False bonus_cents = 0 new_paid_bonus_cents = 0 if table_exists(con, table): with con: row = con.execute( f'select bonus_cents, paid_bonus_cents, rowid from {table} WHERE job_id==? and worker_id==?', (job_id, worker_id)).fetchone() if row: bonus_cents = row["bonus_cents"] should_pay = bonus_cents > 0 new_paid_bonus_cents = row["bonus_cents"] + row[ "paid_bonus_cents"] else: app.logger.warning( f"pay_worker_bonus: worker not found! job_id: {job_id}, worker_id: {worker_id}" ) if should_pay: #TODO: CHECK LATER FOR PAYMENT app.logger.info(f"SHOULD BE PAYING: {bonus_cents} cents") if job_config["payment_max_cents"] > 0 and job_config[ "payment_max_cents"] > bonus_cents: app.logger.warning( f"Attempted payment over max allowed payment to worker {worker_id} on job {job_id}" ) return False success = api.contributor_pay(worker_id, bonus_cents, assignment_id) if not success: app.logger.info( f"Impossible to pay: {bonus_cents} cents to contributor {worker_id}" ) return False else: with con: update( f"UPDATE {table} SET bonus_cents=?, paid_bonus_cents=? WHERE rowid=?", (0, new_paid_bonus_cents, row["rowid"]), con=con) if send_notification: api.contributor_notify( worker_id, f"Thank you for your participation. You just received your total bonus of {cents_repr(bonus_cents)} ^_^" ) return True else: if send_notification: api.contributor_notify( worker_id, f"Thank you for your participation. Either you have already been paid or your bonus amount to 0.0 USD. ^_^" ) return False