def generateAndVote(): db = DBConnection() generate = db.query_and_return_array("""SELECT * FROM assignments, slow_snapshots, study_videos WHERE assignments.assignmentid = slow_snapshots.assignmentid AND assignments.videoid = study_videos.videoid AND workerid NOT LIKE 'photographer' ORDER BY assignments.videoid, assignments.submit""") generate_by_video = groupByKey(generate, lambda x: x['videoid']) lags = [] for video in generate_by_video.keys(): print(video) generates = list(generate_by_video[video]) # get third dude third = generates[2] generate_lag = third['submit'] - third['slow_available_time'] #print generate_lag votes = db.query_and_return_array("""SELECT * FROM assignments, slow_votes, study_videos WHERE assignments.videoid = %s AND assignments.assignmentid = slow_votes.assignmentid AND assignments.videoid = study_videos.videoid ORDER BY assignments.submit""", (video, )) fifth = votes[4] vote_lag = fifth['submit'] - fifth['slow_voting_available_time'] #print vote_lag lags.append(float(generate_lag + vote_lag)) print(lags[-1]) print lags print("\n\n\nRESULTS") print("Median: %s" % numpy.median(lags)) print("Average: %s" % numpy.mean(lags)) print("Std. Dev: %s" % numpy.std(lags))
def uploadVideo(name, width, height): db = DBConnection() try: sql = """INSERT INTO videos (filename, width, height, creationtime, enabled) VALUES (%s, %s, %s, %s, FALSE)""" db.query_and_return_array(sql, (name, width, height, unixtime(datetime.now()))) except Exception, e: print("Error writing video to database:") print(e)
def portPhotographer(): db = DBConnection() snapshots = db.query_and_return_array( """SELECT slow_snapshots.pk, slow_snapshots.location, assignments.videoid FROM slow_snapshots, assignments, study_videos WHERE slow_snapshots.assignmentid = assignments.assignmentid AND workerid LIKE 'photographer' AND study_videos.videoid = assignments.videoid ORDER BY assignments.videoid""" ) for snapshot in snapshots: print(snapshot) db.query_and_return_array( """INSERT INTO algorithms (algorithm, videoid, location, detail) VALUES (%s, %s, %s, %s)""", (PHOTOGRAPHER, snapshot['videoid'], snapshot['location'], json.dumps(dict(snapshot_pk=snapshot['pk'])))) print("Done!")
def portGenerateAndVote(): """ports information into the algorithms table for the generate and vote condition""" db = DBConnection() sql = """SELECT vote_assignments.pk, study_videos.videoid, vote FROM study_videos LEFT JOIN (SELECT slow_votes.pk, slow_votes.assignmentid, videoid, vote FROM slow_votes, assignments WHERE slow_votes.assignmentid = assignments.assignmentid) AS vote_assignments ON vote_assignments.videoid = study_videos.videoid WHERE slow_voting_available = TRUE ORDER BY vote_assignments.pk""" votes = db.query_and_return_array(sql) # we need to take just the first five votes for each video grouped_votes = groupAssignmentsByKey(votes, lambda x: x['videoid']) videoids = sorted(grouped_votes.keys()) votes = [] for videoid in videoids: video_votes = sorted(list(grouped_votes[videoid]), key=lambda x: x['pk']) num_votes = 0 winner = None while winner is None: if len(video_votes) < num_votes: print("Not enough votes to decide this case: %s", video_votes) return num_votes += 1 (winner, counts) = checkForWinner(video_votes[:num_votes]) vote = dict() vote['videoid'] = videoid vote['slow_vote_pks'] = [ vote['pk'] for vote in video_votes[:num_votes] ] vote['count'] = counts vote['winner'] = (Decimal(str(winner)) / 100) - Decimal( '.01' ) # off by one frame error from jpeg filename to slider position votes.append(vote) print vote # num_voters = sorted([len(vote['slow_vote_pks']) for vote in votes]) for vote in votes: db.query_and_return_array( """INSERT INTO algorithms (algorithm, videoid, location, detail) VALUES (%s, %s, %s, %s)""", (GENERATE_AND_VOTE, vote['videoid'], vote['winner'], json.dumps(vote, cls=StudyDecimalEncoder))) print("Done")
def portTakeFirst(): db = DBConnection() sql = """SELECT slow_snapshots.pk, slow_snapshots.location, assignments.videoid FROM slow_snapshots, assignments WHERE slow_snapshots.pk IN (SELECT MIN(slow_snapshots.pk) FROM slow_snapshots, assignments, study_videos WHERE assignments.assignmentid = slow_snapshots.assignmentid AND assignments.videoid = study_videos.videoid AND workerid NOT LIKE 'photographer' GROUP BY study_videos.videoid) AND assignments.assignmentid = slow_snapshots.assignmentid ORDER BY assignments.videoid ASC """ snapshots = db.query_and_return_array(sql) for snapshot in snapshots: print(snapshot) db.query_and_return_array( """INSERT INTO algorithms (algorithm, videoid, location, detail) VALUES (%s, %s, %s, %s)""", (GENERATE_FIRST, snapshot['videoid'], snapshot['location'], json.dumps(dict(snapshot_pk=snapshot['pk'])))) print("Done!")
def setRandomCondition(worker_id): """ Chooses a random group to assign the worker to, and sets it in the database """ db = DBConnection() count = db.query_and_return_array( """SELECT COUNT(*) FROM workers""")[0]['COUNT(*)'] index = count % len(CONDITIONS) random_condition = CONDITIONS[index] db.query_and_return_array( """INSERT INTO workers (workerid, is_alert, is_reward, is_tetris, read_instructions) VALUES (%s, %s, %s, %s, FALSE) ON DUPLICATE KEY UPDATE is_alert=%s, is_reward=%s, is_tetris=%s, read_instructions=FALSE""", (worker_id, random_condition['is_alert'], random_condition['is_reward'], random_condition['is_tetris'], random_condition['is_alert'], random_condition['is_reward'], random_condition['is_tetris'])) return random_condition
def parseVideos(study_videos_only=True): """ Grabs each video from the DB and tells us information about its lag """ db = DBConnection() if study_videos_only: all_videos = db.query_and_return_array( """SELECT pk, filename FROM videos, study_videos WHERE videos.pk = study_videos.videoid""" ) else: all_videos = db.query_and_return_array( """SELECT pk, filename FROM videos""") printPhaseTimes(db, study_videos_only, all_videos) print('\n') printVideoTimes(db, study_videos_only)
def portContinuousRefinement(): """ports information into the algorithhms table for the continuous refinement condition""" db = DBConnection() sql = """SELECT * FROM pictures, study_videos, phase_lists WHERE pictures.videoid = study_videos.videoid AND pictures.phase_list = phase_lists.pk AND NOT is_historical GROUP BY pictures.videoid""" pictures = db.query_and_return_array(sql) if len(pictures) != NUM_VIDEOS: print( str(len(pictures)) + " videos instead of " + str(NUM_VIDEOS) + ". Stopping.") print([picture['videoid'] for picture in pictures]) return for picture in pictures: sql = """INSERT INTO algorithms (algorithm, videoid, location, detail) VALUES (%s, %s, %s, %s)""" detail = json.dumps(dict(phase_list=picture['phase_list'])) db.query_and_return_array(sql, (CONTINUOUS_REFINEMENT, picture['videoid'], picture['location'], detail)) print("done")
def getPhotos(filename): db = DBConnection() locations = db.query_and_return_array( """SELECT DISTINCT location FROM pictures, videos WHERE pictures.videoid = videos.pk AND videos.filename = %s""", (filename, )) # don't forget off-by-one: slider starts at 0, but first photo is photo1.jpg filenumbers = [ '%03d' % min(int(row['location'] * 100 + 1), 100) for row in locations ] return [ VIDEO_HOST_DIRECTORY + filename + filenumber + '.jpg' for filenumber in filenumbers ]
def getAssignments(experiments): """ Queries the database for all the assignments completed in this experiment, and populates the array with all relevant timestamps """ db = DBConnection() experimentString = ', '.join( [str(experiment) for experiment in experiments]) results = db.query_and_return_array( """SELECT * from assignments a, workers w, hits h WHERE experiment IN (""" + experimentString + """) AND a.workerid = w.workerid AND a.hitid = h.hitid """) assignments = [] """ For each assignment, get its completion information """ for row in results: assignment = Assignment() assignment.workerid = row['workerid'] assignment.assignmentid = row['assignmentid'] assignment.wait_bucket = row['waitbucket'] assignment.precision = row['precision'] assignment.recall = row['recall'] assignment.condition = condition.getConditionName( bool(row['is_alert']), bool(row['is_reward']), bool(row['is_tetris'])) if row['first'] is not None: assignment.answer_time = datetime.fromtimestamp(row['first']) # when did the worker accept that task? if row['accept'] is not None: assignment.accept_time = datetime.fromtimestamp(row['accept']) # when did the task or 'go' button appear to the user? if row['show'] is not None: assignment.show_time = datetime.fromtimestamp(row['show']) # when did the worker click the "go" button? if row['go'] is not None: assignment.go_time = datetime.fromtimestamp(row['go']) if row['submit'] is not None: assignment.submit_time = datetime.fromtimestamp(row['submit']) assignments.append(assignment) print("Total number of assignments: %s" % (len(assignments))) return assignments
def getCondition(worker_id): """ Sees if the worker has already been assigned a condition, and if not, assigns them """ db = DBConnection() rows = db.query_and_return_array( """SELECT is_alert, is_reward, is_tetris FROM workers WHERE workerid = %s """, (worker_id, )) if len(rows) == 0: # not in database yet result = setRandomCondition(worker_id) else: result = rows[0] is_alert = bool(result['is_alert']) is_reward = bool(result['is_reward']) is_tetris = bool(result['is_tetris']) return {'isAlert': is_alert, 'isReward': is_reward, 'isTetris': is_tetris}
def randomizeConditions(do_you_mean_it="NO"): if do_you_mean_it != "YES_I_MEAN_IT": print """ This will assign all workerids to a new random condition. Do not do this lightly!\n Call with randomizeConditions('YES_I_MEAN_IT') """ return 0 else: print("Assigning all workers to a new random condition") db = DBConnection() # get all workers, randomize them one by one result = db.query_and_return_array("""SELECT workerid FROM workers""") workers = [row['workerid'] for row in result] for worker in workers: setRandomCondition(worker)
def getSlowPhotos(videoid, limit): db = DBConnection() locations = db.query_and_return_array( """SELECT filename, location FROM slow_snapshots, assignments, videos WHERE slow_snapshots.assignmentid = assignments.assignmentid AND videos.pk = assignments.videoid AND assignments.videoid = %s ORDER BY slow_snapshots.pk ASC LIMIT %s""", (videoid, limit)) filename = locations[0]['filename'] # don't forget off-by-one: slider starts at 0, but first photo is photo1.jpg filenumbers = set([ '%03d' % min(int(row['location'] * 100 + 1), 100) for row in locations ]) # use set() so we don't get copies return [ VIDEO_HOST_DIRECTORY + filename + filenumber + '.jpg' for filenumber in filenumbers ]
def printCurrentlyActiveCount(): ping_floor = datetime.now() - timedelta(seconds=15) ping_types = ["ping-waiting", "ping-showing", "ping-working"] db = DBConnection() results = dict() for ping_type in ping_types: row = db.query_and_return_array( """SELECT COUNT(DISTINCT assignmentid) FROM logging WHERE event='%s' AND servertime >= %s""" % (ping_type, unixtime(ping_floor)))[0] results[ping_type] = row['COUNT(DISTINCT assignmentid)'] print(ping_type + ": unique assignmentIds pings in last 15 seconds: " + str(results[ping_type])) return results
def answer_reviewer(answer): result = None approve_response = (True, APPROVE_REASON) reject_response = (False, REJECT_REASON) db = DBConnection() try: whacked = [int(s) for s in answer['m'].split('|')] assignmentid = answer['assignmentId'] actualPosition = int(db.query_and_return_array("""SELECT moleposition FROM assignments, moles WHERE assignmentid = %s AND moles.pk = assignments.moleid""", (assignmentid, ))[0]['moleposition']) print("Whack! Position %s, goal position %s" % (whacked, actualPosition)) if (actualPosition in whacked): result = approve_response else: result = reject_response except: logging.exception("error reviewing answer: " + str(answer)) return result
def agreedWithPhases(phases, locations, min_phases): count_agreed = 0 db = DBConnection() print locations for i in range(len(phases) - 1): # get the location we were at when the phase ended try: last_location = Decimal(str(locations[i])) except IndexError: # we don't have enough data points, maybe they came in midway through the decision # we'll just use one they do have last_location = Decimal(str(locations[0])) # get the bounds that ended up being decided on (e.g., the next phase) bounds = db.query_and_return_array( """SELECT min, max FROM phases WHERE phase = %s""", (phases[i + 1], ))[0] if bounds['min'] <= last_location and last_location <= bounds['max']: count_agreed += 1 return count_agreed >= min_phases
def printSummary(assignments, assignments_including_incomplete, condition=None): # TODO?: WARNING: not removing first worker attempt to smooth print("N = %d, %d unique workers" % (len(assignments), len(set([assignment.workerid for assignment in assignments])))) if len(assignments) == 0: return table = [["metric", "10%", "25%", "50%", "75%", "90%", "mean", "std. dev"]] accept_show = [ click.showDeltaAccept() for click in assignments if click.showDeltaAccept() is not None ] table.append([ "accept-show", str(stats.scoreatpercentile(accept_show, 10)), str(stats.scoreatpercentile(accept_show, 25)), str(stats.scoreatpercentile(accept_show, 50)), str(stats.scoreatpercentile(accept_show, 75)), str(stats.scoreatpercentile(accept_show, 90)), str(numpy.mean(accept_show)), str(numpy.std(accept_show)) ]) go_show = [ click.goDeltaShow() for click in assignments_including_incomplete ] for i in range(len(go_show)): if go_show[i] is None: go_show[i] = sys.maxint table.append([ "show-go", str(stats.scoreatpercentile(go_show, 10)), str(stats.scoreatpercentile(go_show, 25)), str(stats.scoreatpercentile(go_show, 50)), str(stats.scoreatpercentile(go_show, 75)), str(stats.scoreatpercentile(go_show, 90)), str(numpy.mean(go_show)), str(numpy.std(go_show)) ]) go_answer = [ click.answerDeltaGo() for click in assignments if click.answerDeltaGo() is not None ] table.append([ "go-answer", str(stats.scoreatpercentile(go_answer, 10)), str(stats.scoreatpercentile(go_answer, 25)), str(stats.scoreatpercentile(go_answer, 50)), str(stats.scoreatpercentile(go_answer, 75)), str(stats.scoreatpercentile(go_answer, 90)), str(numpy.mean(go_answer)), str(numpy.std(go_answer)) ]) pprint_table(sys.stdout, table) print("UNDER TWO: %s" % stats.percentileofscore(go_show, 2.0)) print("UNDER THREE: %s" % stats.percentileofscore(go_show, 3.0)) # Correlation between wait-show and show-go #(r, p_val) = stats.pearsonr(accept_show, go_show) #print("Correlation between accept-show and show-go: %f, p<%f" % (r, p_val)) #(r_answer, p_val_answer) = stats.pearsonr(go_show, go_answer) #print("Correlation between show-go and go-answer: %f, p<%f" % (r_answer, p_val_answer)) # bounce rate mortality = 1 - float( len(assignments)) / len(assignments_including_incomplete) print("Mortality Rate: " + str(mortality)) # preview to accept ratio: we can look for a given HIT id # (which will only have one assignment) how many unique IPs previewed it # it's possible that lots of people previewed and wanted, but only one # was fast enough to grab it? if condition is not None: db = DBConnection() if condition == 'tetris': experiments_string = ','.join([str(ex) for ex in EXPERIMENTS]) result = db.query_and_return_array( """ SELECT COUNT(DISTINCT assignmentid) FROM logging WHERE event = 'tetris_row_clear' AND experiment IN (%s) """, (experiments_string, ))[0] num_playing = result['COUNT(DISTINCT assignmentid)'] print( str(num_playing) + " assignments out of " + str(len(assignments)) + " (" + str(float(num_playing) / len(assignments) * 100) + "%) cleared a row in Tetris ")
def deleteContinuousRefinement(): db = DBConnection() db.query_and_return_array("DELETE FROM algorithms WHERE algorithm = %s", (CONTINUOUS_REFINEMENT)) print("Deleted")