def printTestDuration(path): allTime = [] for fn in utils.findFiles(path, filterExt=".csv"): timeStrList = [rowTuple[2].split(",")[-2] for rowTuple in user_response.loadUserResponse(join(path, fn))] timeList = [] for timeStamp in timeStrList: try: minutes, seconds = timeStamp.split(':') except ValueError: continue seconds = int(minutes) * 60 + float(seconds) minutes = seconds / 60.0 timeList.append(minutes) totalTime = sum(timeList) allTime.append(totalTime) print("%s, %f" % (fn, totalTime)) meanTime = sum(allTime) / len(allTime) print("Mean: %f" % meanTime) timeDeviationList = [(time - meanTime) ** 2 for time in allTime] stDev = math.sqrt(sum(timeDeviationList) / len(allTime)) print("Standard Deviation: %f" % stDev)
def postProcessResults(testName, sequenceFN, removeDuplicatesFlag, removeItemList=None): rootPath = join(constants.rootDir, "tests", testName) txtPath = join(rootPath, "txt") tmpSequence = sequence.TestSequence(None, join(rootPath, sequenceFN)) fullPath = join(rootPath, "output", tmpSequence.sequenceTitle) pathToData = fullPath if removeDuplicatesFlag is True: removeDuplicates(pathToData, True) pathToData = join(pathToData, "duplicates_removed") else: newPathToData = join(pathToData, "duplicates_not_removed") utils.makeDir(newPathToData) for fn in utils.findFiles(pathToData, filterExt=".csv"): shutil.copy(join(pathToData, fn), join(newPathToData, fn)) pathToData = newPathToData outputPath = pathToData + "_results" userResponseList = [] fnList = utils.findFiles(pathToData, filterExt=".csv") for fn in fnList: fullPath = join(pathToData, fn) userResponseList.append(user_response.loadUserResponse(fullPath)) # Don't continue if files are of different lengths testLen = len(userResponseList[0]) if not all([len(response) == testLen for response in userResponseList]): print("ERROR: Not all responses in folder %s are the same length" % pathToData) countDict = {} for fn, response in utils.safeZip([fnList, userResponseList], True): countDict.setdefault(len(response), []) countDict[len(response)].append(fn) keyList = list(countDict.keys()) keyList.sort() for numLines in keyList: print("%d lines - %s" % (numLines, str(countDict[numLines]))) exit(0) # Don't continue if pages are different pageNameList = [[(pageTuple[0], pageTuple[1]) for pageTuple in response] for response in userResponseList] sameList = [] fnListOfLists = [] for fn, pageList in utils.safeZip([fnList, pageNameList], True): i = 0 while True: if len(sameList) == i: sameList.append(pageList) fnListOfLists.append([]) else: if sameList[i] == pageList: fnListOfLists[i].append(fn) break else: i += 1 if len(sameList) == 0: print("ERROR: There don't appear to be any test data in folder %s" % pathToData) exit(0) if len(sameList) != 1: print("ERROR: User data doesn't agree. Filenames printed on " "different lines differ in their pages.") for subFNList in fnListOfLists: print(", ".join(subFNList)) # Extract the different tests users completed uniquePageList = [] for pageTuple in pageNameList[0]: pageName = pageTuple[0] if pageName not in uniquePageList: uniquePageList.append(pageName) extractFromTest(pathToData, uniquePageList, removeItemList) # Transpose the surveys if "survey" in uniquePageList: surveyNameList = [] for pageName, stimuliArgList in pageNameList[0]: if pageName == "survey": surveyName = stimuliArgList[0] surveyNameList.append(join(rootPath, surveyName + '.txt')) transpose_survey.transposeSurvey(join(pathToData, "survey"), surveyNameList, outputPath) # Transpose the rpt pages prominencePageList = ["prominence", "boundary", "boundary_and_prominence", "syllable_marking"] for pageName in prominencePageList: if pageName in uniquePageList: transpose_rpt.transposeRPT(join(pathToData, pageName), txtPath, pageName, outputPath) choicePageList = ["media_choice", ] for pageName in choicePageList: if pageName in uniquePageList: transpose_choice.transposeChoice(join(pathToData, pageName), pageName, outputPath)
def transposeChoice(path, pageName, outputPath): utils.makeDir(outputPath) # Load response data responseDataList = [] fnList = utils.findFiles(path, filterExt=".csv") for fn in fnList: a = user_response.loadUserResponse(join(path, fn)) responseDataList.append(a) # Sort response if sequence order information is available parsedTuple = transpose_utils.parseResponse(responseDataList) responseDataList, stimuliListsOfLists, orderListOfLists = parsedTuple # Convert response to single answer tmpUserResponse = [] for userDataList in responseDataList: # Get user response userResponse = [str(responseTuple[3].split(',').index('1')) for responseTuple in userDataList] tmpUserResponse.append(userResponse) responseDataList = tmpUserResponse # Verify that all responses have the same list of stimuli assert(all([stimuliListsOfLists[0] == header for header in stimuliListsOfLists])) # Transpose data tResponseDataList = [row for row in utils.safeZip(responseDataList, True)] tOrderListOfLists = [] if len(orderListOfLists) > 0: tOrderListOfLists = [row for row in utils.safeZip(orderListOfLists, True)] # Add a unique id to each row oom = utils.orderOfMagnitude(len(stimuliListsOfLists[0])) stimID = "s%%0%dd" % (oom + 1) stimuliList = ["%s,%s" % (stimID % i, row) for i, row in enumerate(stimuliListsOfLists[0])] addSequenceInfo = len(tOrderListOfLists) > 0 if addSequenceInfo: # Add sequence information to each row tResponseDataList = [list(row) + list(sequenceInfo) for row, sequenceInfo in utils.safeZip([tResponseDataList, tOrderListOfLists], True)] # Aggregate the stimuli and the responses in rows tResponseDataList = [list(row) for row in tResponseDataList] outputList = [[header, ] + list(row) for header, row in utils.safeZip([stimuliList, tResponseDataList], True)] # Add the column heading rows # First row in unanonymized user names; Second row is anonymized numArgs = stimuliList[0].count(",") rowOne, rowTwo = _buildHeader(fnList, numArgs, pageName, addSequenceInfo) outputList = [rowOne, rowTwo, ] + outputList outputTxt = u"\n".join([",".join(row) for row in outputList]) outputFN = join(outputPath, pageName + ".csv") with io.open(outputFN, "w", encoding="utf-8") as fd: fd.write(outputTxt) # Output a template users can fill in to auto score the results name = pageName + "_answer_template.csv" answersFN = join(outputPath, name) if os.path.exists(answersFN): print("Response template '%s' already exists. Not overwriting." % name) else: outputTxt = u"\n".join(stimuliList) with io.open(answersFN, "w", encoding="utf-8") as fd: fd.write(outputTxt)
def transposeRPT(path, txtPath, pageName, outputPath): ''' Transposes RPT data Input files: one file per subject Output files: one file per stimuli ''' utils.makeDir(outputPath) # Load response data responseDataList = [] fnList = utils.findFiles(path, filterExt=".csv") for fn in fnList: a = user_response.loadUserResponse(join(path, fn)) responseDataList.append(a) # Load the demarcator, if there is one # and load the order info if present demarcator = None pageName, pageArgs, _, _ = responseDataList[0][0] if pageName == "syllable_marking": # The demarcator can either be an arg or a keyword arg. # Either way, it should be the last item in the list demarcator = pageArgs[-1] if "syllableDemarcator" in demarcator: demarcator = demarcator.split("=")[1] # Sort response if sequence order information is available parsedTuple = transpose_utils.parseResponse(responseDataList) responseDataList, _, orderListOfLists = parsedTuple orderList = [] if len(orderListOfLists) > 0: orderList = [",".join(row) for row in utils.safeZip(orderListOfLists, True)] # Load Words txtDict = {} for fn in utils.findFiles(txtPath, filterExt=".txt"): name = os.path.splitext(fn)[0] txtList = loader.loadTxtFile(join(txtPath, fn)) txtList = [tmpTxt.replace(" ", ",") for tmpTxt in txtList] # Remove HTML tags txtList = [word for word in txtList if "<" not in word] txt = ",".join(txtList) if demarcator is None: txtDict[name] = [word for word in txt.split(",") if word != ""] else: txtDict[name] = [syllable for word in txt.split(",") if word != "" for syllable in word.split(demarcator)] returnDict, idKeyList = _transposeRPT(responseDataList) doUserSeqHeader = len(orderListOfLists) > 0 headerRow, anonHeaderRow = _buildHeader(fnList, pageName, doUserSeqHeader, idKeyList[0]) # Format the output rpt scores aggrOutputList = [headerRow, anonHeaderRow] for i in range(len(idKeyList)): stimulusID = idKeyList[i] wordList = txtDict[stimulusID.split(",")[2]] stimulusIDList = [stimulusID for _ in wordList] aspectSumList = [stimulusIDList, wordList, ] aspectList = [] try: bScoreList, bSumList = _getScores(returnDict[stimulusID], B) except KeyError: pass try: pScoreList, pSumList = _getScores(returnDict[stimulusID], P) except KeyError: pass if pageName == "boundary": aspectSumList.extend([bSumList, ]) aspectList.extend([bScoreList, ]) elif pageName in ["prominence", "syllable_marking"]: aspectSumList.extend([pSumList, ]) aspectList.extend([pScoreList, ]) elif pageName == "boundary_and_prominence": aspectSumList.extend([bSumList, pSumList, ]) aspectList.extend([bScoreList, pScoreList, ]) # Extend header with sequence order information if doUserSeqHeader: orderStr = orderList[i] numAnnotators = range(max([len(bSumList), len(pSumList)])) tmpOrderList = [orderStr for _ in numAnnotators] aspectList.extend([tmpOrderList, ]) dataList = aspectSumList + aspectList combinedList = [_unifyRow(row) for row in utils.safeZip(dataList, enforceLength=True)] aggrOutputList.extend([",".join(row) for row in combinedList]) outputTxt = "\n".join(aggrOutputList) outputFN = join(outputPath, pageName + ".csv") with io.open(outputFN, "w", encoding="utf-8") as fd: fd.write(outputTxt)
def transposeRPT(path, txtPath, pageName, outputPath): ''' Transposes RPT data Input files: one file per subject Output files: one file per stimuli ''' utils.makeDir(outputPath) # Load response data responseDataList = [] fnList = utils.findFiles(path, filterExt=".csv") for fn in fnList: a = user_response.loadUserResponse(join(path, fn)) responseDataList.append(a) # Load the demarcator, if there is one # and load the order info if present demarcator = None pageName, pageArgs, _, _ = responseDataList[0][0] if pageName == "syllable_marking": # The demarcator can either be an arg or a keyword arg. # Either way, it should be the last item in the list demarcator = pageArgs[-1] if "syllableDemarcator" in demarcator: demarcator = demarcator.split("=")[1] # Sort response if sequence order information is available parsedTuple = transpose_utils.parseResponse(responseDataList) responseDataList, _, orderListOfLists = parsedTuple orderList = [] if len(orderListOfLists) > 0: orderList = [ ",".join(row) for row in utils.safeZip(orderListOfLists, True) ] # Load Words txtDict = {} for fn in utils.findFiles(txtPath, filterExt=".txt"): name = os.path.splitext(fn)[0] txtList = loader.loadTxtFile(join(txtPath, fn)) txtList = [tmpTxt.replace(" ", ",") for tmpTxt in txtList] # Remove HTML tags txtList = [word for word in txtList if "<" not in word] txt = ",".join(txtList) if demarcator is None: txtDict[name] = [word for word in txt.split(",") if word != ""] else: txtDict[name] = [ syllable for word in txt.split(",") if word != "" for syllable in word.split(demarcator) ] returnDict, idKeyList = _transposeRPT(responseDataList) doUserSeqHeader = len(orderListOfLists) > 0 headerRow, anonHeaderRow = _buildHeader(fnList, pageName, doUserSeqHeader, idKeyList[0]) # Format the output rpt scores aggrOutputList = [headerRow, anonHeaderRow] for i in range(len(idKeyList)): stimulusID = idKeyList[i] wordList = txtDict[stimulusID.split(",")[2]] stimulusIDList = [stimulusID for _ in wordList] aspectSumList = [ stimulusIDList, wordList, ] aspectList = [] try: bScoreList, bSumList = _getScores(returnDict[stimulusID], B) except KeyError: pass try: pScoreList, pSumList = _getScores(returnDict[stimulusID], P) except KeyError: pass if pageName == "boundary": aspectSumList.extend([ bSumList, ]) aspectList.extend([ bScoreList, ]) elif pageName in ["prominence", "syllable_marking"]: aspectSumList.extend([ pSumList, ]) aspectList.extend([ pScoreList, ]) elif pageName == "boundary_and_prominence": aspectSumList.extend([ bSumList, pSumList, ]) aspectList.extend([ bScoreList, pScoreList, ]) # Extend header with sequence order information if doUserSeqHeader: orderStr = orderList[i] numAnnotators = range(max([len(bSumList), len(pSumList)])) tmpOrderList = [orderStr for _ in numAnnotators] aspectList.extend([ tmpOrderList, ]) dataList = aspectSumList + aspectList combinedList = [ _unifyRow(row) for row in utils.safeZip(dataList, enforceLength=True) ] aggrOutputList.extend([",".join(row) for row in combinedList]) outputTxt = "\n".join(aggrOutputList) outputFN = join(outputPath, pageName + ".csv") with io.open(outputFN, "w", encoding="utf-8") as fd: fd.write(outputTxt)
def transposeChoice(path, pageName, outputPath): utils.makeDir(outputPath) # Load response data responseDataList = [] fnList = utils.findFiles(path, filterExt=".csv") for fn in fnList: a = user_response.loadUserResponse(join(path, fn)) responseDataList.append(a) # Sort response if sequence order information is available parsedTuple = transpose_utils.parseResponse(responseDataList) responseDataList, stimuliListsOfLists, orderListOfLists = parsedTuple # Convert response to single answer tmpUserResponse = [] for userDataList in responseDataList: # Get user response userResponse = [ str(responseTuple[3].split(',').index('1')) for responseTuple in userDataList ] tmpUserResponse.append(userResponse) responseDataList = tmpUserResponse # Verify that all responses have the same list of stimuli assert (all( [stimuliListsOfLists[0] == header for header in stimuliListsOfLists])) # Transpose data tResponseDataList = [row for row in utils.safeZip(responseDataList, True)] tOrderListOfLists = [] if len(orderListOfLists) > 0: tOrderListOfLists = [ row for row in utils.safeZip(orderListOfLists, True) ] # Add a unique id to each row oom = utils.orderOfMagnitude(len(stimuliListsOfLists[0])) stimID = "s%%0%dd" % (oom + 1) stimuliList = [ "%s,%s" % (stimID % i, row) for i, row in enumerate(stimuliListsOfLists[0]) ] addSequenceInfo = len(tOrderListOfLists) > 0 if addSequenceInfo: # Add sequence information to each row tResponseDataList = [ list(row) + list(sequenceInfo) for row, sequenceInfo in utils.safeZip([tResponseDataList, tOrderListOfLists], True) ] # Aggregate the stimuli and the responses in rows tResponseDataList = [list(row) for row in tResponseDataList] outputList = [[ header, ] + list(row) for header, row in utils.safeZip( [stimuliList, tResponseDataList], True)] # Add the column heading rows # First row in unanonymized user names; Second row is anonymized numArgs = stimuliList[0].count(",") rowOne, rowTwo = _buildHeader(fnList, numArgs, pageName, addSequenceInfo) outputList = [ rowOne, rowTwo, ] + outputList outputTxt = u"\n".join([",".join(row) for row in outputList]) outputFN = join(outputPath, pageName + ".csv") with io.open(outputFN, "w", encoding="utf-8") as fd: fd.write(outputTxt) # Output a template users can fill in to auto score the results name = pageName + "_answer_template.csv" answersFN = join(outputPath, name) if os.path.exists(answersFN): print("Response template '%s' already exists. Not overwriting." % name) else: outputTxt = u"\n".join(stimuliList) with io.open(answersFN, "w", encoding="utf-8") as fd: fd.write(outputTxt)
def transposeSurvey(path, surveyFullPathList, outputPath): utils.makeDir(outputPath) surveyDataList = [] fnList = utils.findFiles(path, filterExt=".csv") for fn in fnList: surveyDataList.append(user_response.loadUserResponse(join(path, fn))) aspectKeyList = [] # Load the data returnDict = {} defaultDict = {} for surveyFN in surveyFullPathList: fn = os.path.split(surveyFN)[1] surveyName = os.path.splitext(fn)[0] questionTitleDataList = [] surveyQuestionDataList = [] surveyItemList = survey.parseSurveyFile(surveyFN) for surveyItem in surveyItemList: for widgetType, widgetTextList in surveyItem.widgetList: if widgetType == "None": continue if widgetType in ["Multiline_Textbox", "Sliding_Scale"]: widgetTextList = ["", ] blankTxt = ["", ] * (len(widgetTextList) - 1) # Removing commas b/c we're using csv files surveyQuestion = surveyItem.text.replace(",", "") questionTitleDataList.extend([surveyQuestion, ] + blankTxt) if len(widgetTextList) == 0: surveyQuestionDataList.extend(["", ]) else: surveyQuestionDataList.extend(widgetTextList) defaultDict.setdefault(surveyName, []) defaultDict[surveyName].append(questionTitleDataList) defaultDict[surveyName].append(surveyQuestionDataList) for fn, userDataList in utils.safeZip([fnList, surveyDataList], True): for dataTuple in userDataList: # taskName, stimuliArgList, argTxt, dataTxt = dataTuple stimuliArgList = dataTuple[1] stimuliID = stimuliArgList[0] dataTxt = dataTuple[3] returnDict.setdefault(stimuliID, defaultDict[stimuliID]) dataList = dataTxt.split(",") returnDict[stimuliID].append(dataList) idKeyList = returnDict.keys() # Transpose the data for stimulusID in idKeyList: returnDict[stimulusID] = [list(subTuple) for subTuple in utils.safeZip(returnDict[stimulusID], enforceLength=True)] # Add a summation column newData = [] for row in returnDict[stimulusID]: try: total = str(sum([int(val) if val != '' else 0 for val in row[2:]])) except ValueError: total = '-' newData.append(row[:2] + [total, ] + row[2:]) returnDict[stimulusID] = newData mainSurveyData = [",".join(subList) for subList in returnDict[stimulusID]] outputTxtList = [",".join(["", "", "Total", ] + fnList), ] outputTxtList += mainSurveyData outputTxt = "\n".join(outputTxtList) outputFN = join(outputPath, stimulusID + ".csv") with io.open(outputFN, "w", encoding="utf-8") as fd: fd.write(outputTxt) return returnDict, idKeyList, aspectKeyList