예제 #1
0
def printTestDuration(path):
    allTime = []
    for fn in utils.findFiles(path, filterExt=".csv"):
        timeStrList = [rowTuple[2].split(",")[-2] for rowTuple in
                       user_response.loadUserResponse(join(path, fn))]
        timeList = []
        for timeStamp in timeStrList:
            try:
                minutes, seconds = timeStamp.split(':')
            except ValueError:
                continue
            seconds = int(minutes) * 60 + float(seconds)
            minutes = seconds / 60.0
            timeList.append(minutes)
            
        totalTime = sum(timeList)
        allTime.append(totalTime)
        
        print("%s, %f" % (fn, totalTime))
    
    meanTime = sum(allTime) / len(allTime)
    print("Mean: %f" % meanTime)
    
    timeDeviationList = [(time - meanTime) ** 2 for time in allTime]
    stDev = math.sqrt(sum(timeDeviationList) / len(allTime))
    print("Standard Deviation: %f" % stDev)
예제 #2
0
def postProcessResults(testName, sequenceFN, removeDuplicatesFlag,
                       removeItemList=None):
    
    rootPath = join(constants.rootDir, "tests", testName)
    txtPath = join(rootPath, "txt")
    tmpSequence = sequence.TestSequence(None, join(rootPath, sequenceFN))
    fullPath = join(rootPath, "output", tmpSequence.sequenceTitle)
    pathToData = fullPath
    
    if removeDuplicatesFlag is True:
        removeDuplicates(pathToData, True)
        pathToData = join(pathToData, "duplicates_removed")
    else:
        newPathToData = join(pathToData, "duplicates_not_removed")
        utils.makeDir(newPathToData)
        for fn in utils.findFiles(pathToData, filterExt=".csv"):
            shutil.copy(join(pathToData, fn), join(newPathToData, fn))
        pathToData = newPathToData
    
    outputPath = pathToData + "_results"
    
    userResponseList = []
    fnList = utils.findFiles(pathToData, filterExt=".csv")
    for fn in fnList:
        fullPath = join(pathToData, fn)
        userResponseList.append(user_response.loadUserResponse(fullPath))
    
    # Don't continue if files are of different lengths
    testLen = len(userResponseList[0])
    if not all([len(response) == testLen for response in userResponseList]):
        print("ERROR: Not all responses in folder %s are the same length"
              % pathToData)
        countDict = {}
        for fn, response in utils.safeZip([fnList, userResponseList], True):
            countDict.setdefault(len(response), [])
            countDict[len(response)].append(fn)
            
        keyList = list(countDict.keys())
        keyList.sort()
        for numLines in keyList:
            print("%d lines - %s" % (numLines, str(countDict[numLines])))
        exit(0)
    
    # Don't continue if pages are different
    pageNameList = [[(pageTuple[0], pageTuple[1]) for pageTuple in response]
                    for response in userResponseList]
    sameList = []
    fnListOfLists = []
    for fn, pageList in utils.safeZip([fnList, pageNameList], True):
        i = 0
        while True:
            if len(sameList) == i:
                sameList.append(pageList)
                fnListOfLists.append([])
            else:
                if sameList[i] == pageList:
                    fnListOfLists[i].append(fn)
                    break
                else:
                    i += 1
    
    if len(sameList) == 0:
        print("ERROR: There don't appear to be any test data in folder %s"
              % pathToData)
        exit(0)
        
    if len(sameList) != 1:
        print("ERROR: User data doesn't agree.  Filenames printed on "
              "different lines differ in their pages.")
        
        for subFNList in fnListOfLists:
            print(", ".join(subFNList))
            
    # Extract the different tests users completed
    uniquePageList = []
    for pageTuple in pageNameList[0]:
        pageName = pageTuple[0]
        if pageName not in uniquePageList:
            uniquePageList.append(pageName)
    
    extractFromTest(pathToData, uniquePageList, removeItemList)
    
    # Transpose the surveys
    if "survey" in uniquePageList:
        surveyNameList = []
        for pageName, stimuliArgList in pageNameList[0]:
            if pageName == "survey":
                surveyName = stimuliArgList[0]
                surveyNameList.append(join(rootPath, surveyName + '.txt'))
        
        transpose_survey.transposeSurvey(join(pathToData, "survey"),
                                         surveyNameList, outputPath)
     
    # Transpose the rpt pages
    prominencePageList = ["prominence", "boundary", "boundary_and_prominence",
                          "syllable_marking"]
    for pageName in prominencePageList:
        if pageName in uniquePageList:
            transpose_rpt.transposeRPT(join(pathToData, pageName),
                                       txtPath, pageName, outputPath)
            
    choicePageList = ["media_choice", ]
    for pageName in choicePageList:
        if pageName in uniquePageList:
            transpose_choice.transposeChoice(join(pathToData, pageName),
                                             pageName,
                                             outputPath)
예제 #3
0
def transposeChoice(path, pageName, outputPath):
    
    utils.makeDir(outputPath)
    
    # Load response data
    responseDataList = []
    fnList = utils.findFiles(path, filterExt=".csv")
    for fn in fnList:
        a = user_response.loadUserResponse(join(path, fn))
        responseDataList.append(a)
    
    # Sort response if sequence order information is available
    parsedTuple = transpose_utils.parseResponse(responseDataList)
    responseDataList, stimuliListsOfLists, orderListOfLists = parsedTuple
    
    # Convert response to single answer
    tmpUserResponse = []
    for userDataList in responseDataList:
        # Get user response
        userResponse = [str(responseTuple[3].split(',').index('1'))
                        for responseTuple in userDataList]
        tmpUserResponse.append(userResponse)
    
    responseDataList = tmpUserResponse

    # Verify that all responses have the same list of stimuli
    assert(all([stimuliListsOfLists[0] == header
                for header in stimuliListsOfLists]))
    
    # Transpose data
    tResponseDataList = [row for row in utils.safeZip(responseDataList, True)]
    tOrderListOfLists = []
    if len(orderListOfLists) > 0:
        tOrderListOfLists = [row for row
                             in utils.safeZip(orderListOfLists, True)]
    
    # Add a unique id to each row
    oom = utils.orderOfMagnitude(len(stimuliListsOfLists[0]))
    stimID = "s%%0%dd" % (oom + 1)
    stimuliList = ["%s,%s" % (stimID % i, row)
                   for i, row in enumerate(stimuliListsOfLists[0])]
    
    addSequenceInfo = len(tOrderListOfLists) > 0
    if addSequenceInfo:  # Add sequence information to each row
        tResponseDataList = [list(row) + list(sequenceInfo)
                             for row, sequenceInfo
                             in utils.safeZip([tResponseDataList,
                                               tOrderListOfLists], True)]

    # Aggregate the stimuli and the responses in rows
    tResponseDataList = [list(row)
                         for row
                         in tResponseDataList]
    outputList = [[header, ] + list(row)
                  for header, row
                  in utils.safeZip([stimuliList, tResponseDataList], True)]
    
    # Add the column heading rows
    # First row in unanonymized user names; Second row is anonymized
    numArgs = stimuliList[0].count(",")
    rowOne, rowTwo = _buildHeader(fnList, numArgs, pageName, addSequenceInfo)
    outputList = [rowOne, rowTwo, ] + outputList
    
    outputTxt = u"\n".join([",".join(row) for row in outputList])
    outputFN = join(outputPath, pageName + ".csv")
    with io.open(outputFN, "w", encoding="utf-8") as fd:
        fd.write(outputTxt)

    # Output a template users can fill in to auto score the results
    name = pageName + "_answer_template.csv"
    answersFN = join(outputPath, name)
    if os.path.exists(answersFN):
        print("Response template '%s' already exists.  Not overwriting."
              % name)
    else:
        outputTxt = u"\n".join(stimuliList)
        with io.open(answersFN, "w", encoding="utf-8") as fd:
            fd.write(outputTxt)
예제 #4
0
def transposeRPT(path, txtPath, pageName, outputPath):
    '''
    Transposes RPT data
    
    Input files: one file per subject
    Output files: one file per stimuli
    '''
    utils.makeDir(outputPath)

    # Load response data
    responseDataList = []
    fnList = utils.findFiles(path, filterExt=".csv")
    for fn in fnList:
        a = user_response.loadUserResponse(join(path, fn))
        responseDataList.append(a)
    
    # Load the demarcator, if there is one
    # and load the order info if present
    demarcator = None
    pageName, pageArgs, _, _ = responseDataList[0][0]
    if pageName == "syllable_marking":
        
        # The demarcator can either be an arg or a keyword arg.
        # Either way, it should be the last item in the list
        demarcator = pageArgs[-1]
        if "syllableDemarcator" in demarcator:
            demarcator = demarcator.split("=")[1]
    
    # Sort response if sequence order information is available
    parsedTuple = transpose_utils.parseResponse(responseDataList)
    responseDataList, _, orderListOfLists = parsedTuple
    orderList = []
    if len(orderListOfLists) > 0:
        orderList = [",".join(row) for row
                     in utils.safeZip(orderListOfLists, True)]
    
    # Load Words
    txtDict = {}
    for fn in utils.findFiles(txtPath, filterExt=".txt"):
        name = os.path.splitext(fn)[0]
        txtList = loader.loadTxtFile(join(txtPath, fn))
        
        txtList = [tmpTxt.replace(" ", ",") for tmpTxt in txtList]
        
        # Remove HTML tags
        txtList = [word for word in txtList if "<" not in word]
        
        txt = ",".join(txtList)
        
        if demarcator is None:
            txtDict[name] = [word for word in txt.split(",") if word != ""]
        else:
            txtDict[name] = [syllable for word in txt.split(",") if word != ""
                             for syllable in word.split(demarcator)]
    
    returnDict, idKeyList = _transposeRPT(responseDataList)
    
    doUserSeqHeader = len(orderListOfLists) > 0
    headerRow, anonHeaderRow = _buildHeader(fnList, pageName,
                                            doUserSeqHeader,
                                            idKeyList[0])
    
    # Format the output rpt scores
    aggrOutputList = [headerRow, anonHeaderRow]
    for i in range(len(idKeyList)):
        
        stimulusID = idKeyList[i]

        wordList = txtDict[stimulusID.split(",")[2]]
        stimulusIDList = [stimulusID for _ in wordList]
        aspectSumList = [stimulusIDList, wordList, ]
        aspectList = []

        try:
            bScoreList, bSumList = _getScores(returnDict[stimulusID],
                                              B)
        except KeyError:
            pass
        try:
            pScoreList, pSumList = _getScores(returnDict[stimulusID],
                                              P)
        except KeyError:
            pass
        
        if pageName == "boundary":
            aspectSumList.extend([bSumList, ])
            aspectList.extend([bScoreList, ])
        elif pageName in ["prominence", "syllable_marking"]:
            aspectSumList.extend([pSumList, ])
            aspectList.extend([pScoreList, ])
        elif pageName == "boundary_and_prominence":
            aspectSumList.extend([bSumList, pSumList, ])
            aspectList.extend([bScoreList, pScoreList, ])
    
        # Extend header with sequence order information
        if doUserSeqHeader:
            orderStr = orderList[i]
            numAnnotators = range(max([len(bSumList), len(pSumList)]))
            tmpOrderList = [orderStr for _ in numAnnotators]
            aspectList.extend([tmpOrderList, ])
            
        dataList = aspectSumList + aspectList
        combinedList = [_unifyRow(row) for row in
                        utils.safeZip(dataList, enforceLength=True)]
        aggrOutputList.extend([",".join(row) for row in combinedList])
        
    outputTxt = "\n".join(aggrOutputList)
    
    outputFN = join(outputPath, pageName + ".csv")
    with io.open(outputFN, "w", encoding="utf-8") as fd:
        fd.write(outputTxt)
예제 #5
0
def transposeRPT(path, txtPath, pageName, outputPath):
    '''
    Transposes RPT data
    
    Input files: one file per subject
    Output files: one file per stimuli
    '''
    utils.makeDir(outputPath)

    # Load response data
    responseDataList = []
    fnList = utils.findFiles(path, filterExt=".csv")
    for fn in fnList:
        a = user_response.loadUserResponse(join(path, fn))
        responseDataList.append(a)

    # Load the demarcator, if there is one
    # and load the order info if present
    demarcator = None
    pageName, pageArgs, _, _ = responseDataList[0][0]
    if pageName == "syllable_marking":

        # The demarcator can either be an arg or a keyword arg.
        # Either way, it should be the last item in the list
        demarcator = pageArgs[-1]
        if "syllableDemarcator" in demarcator:
            demarcator = demarcator.split("=")[1]

    # Sort response if sequence order information is available
    parsedTuple = transpose_utils.parseResponse(responseDataList)
    responseDataList, _, orderListOfLists = parsedTuple
    orderList = []
    if len(orderListOfLists) > 0:
        orderList = [
            ",".join(row) for row in utils.safeZip(orderListOfLists, True)
        ]

    # Load Words
    txtDict = {}
    for fn in utils.findFiles(txtPath, filterExt=".txt"):
        name = os.path.splitext(fn)[0]
        txtList = loader.loadTxtFile(join(txtPath, fn))

        txtList = [tmpTxt.replace(" ", ",") for tmpTxt in txtList]

        # Remove HTML tags
        txtList = [word for word in txtList if "<" not in word]

        txt = ",".join(txtList)

        if demarcator is None:
            txtDict[name] = [word for word in txt.split(",") if word != ""]
        else:
            txtDict[name] = [
                syllable for word in txt.split(",") if word != ""
                for syllable in word.split(demarcator)
            ]

    returnDict, idKeyList = _transposeRPT(responseDataList)

    doUserSeqHeader = len(orderListOfLists) > 0
    headerRow, anonHeaderRow = _buildHeader(fnList, pageName, doUserSeqHeader,
                                            idKeyList[0])

    # Format the output rpt scores
    aggrOutputList = [headerRow, anonHeaderRow]
    for i in range(len(idKeyList)):

        stimulusID = idKeyList[i]

        wordList = txtDict[stimulusID.split(",")[2]]
        stimulusIDList = [stimulusID for _ in wordList]
        aspectSumList = [
            stimulusIDList,
            wordList,
        ]
        aspectList = []

        try:
            bScoreList, bSumList = _getScores(returnDict[stimulusID], B)
        except KeyError:
            pass
        try:
            pScoreList, pSumList = _getScores(returnDict[stimulusID], P)
        except KeyError:
            pass

        if pageName == "boundary":
            aspectSumList.extend([
                bSumList,
            ])
            aspectList.extend([
                bScoreList,
            ])
        elif pageName in ["prominence", "syllable_marking"]:
            aspectSumList.extend([
                pSumList,
            ])
            aspectList.extend([
                pScoreList,
            ])
        elif pageName == "boundary_and_prominence":
            aspectSumList.extend([
                bSumList,
                pSumList,
            ])
            aspectList.extend([
                bScoreList,
                pScoreList,
            ])

        # Extend header with sequence order information
        if doUserSeqHeader:
            orderStr = orderList[i]
            numAnnotators = range(max([len(bSumList), len(pSumList)]))
            tmpOrderList = [orderStr for _ in numAnnotators]
            aspectList.extend([
                tmpOrderList,
            ])

        dataList = aspectSumList + aspectList
        combinedList = [
            _unifyRow(row)
            for row in utils.safeZip(dataList, enforceLength=True)
        ]
        aggrOutputList.extend([",".join(row) for row in combinedList])

    outputTxt = "\n".join(aggrOutputList)

    outputFN = join(outputPath, pageName + ".csv")
    with io.open(outputFN, "w", encoding="utf-8") as fd:
        fd.write(outputTxt)
예제 #6
0
def transposeChoice(path, pageName, outputPath):

    utils.makeDir(outputPath)

    # Load response data
    responseDataList = []
    fnList = utils.findFiles(path, filterExt=".csv")
    for fn in fnList:
        a = user_response.loadUserResponse(join(path, fn))
        responseDataList.append(a)

    # Sort response if sequence order information is available
    parsedTuple = transpose_utils.parseResponse(responseDataList)
    responseDataList, stimuliListsOfLists, orderListOfLists = parsedTuple

    # Convert response to single answer
    tmpUserResponse = []
    for userDataList in responseDataList:
        # Get user response
        userResponse = [
            str(responseTuple[3].split(',').index('1'))
            for responseTuple in userDataList
        ]
        tmpUserResponse.append(userResponse)

    responseDataList = tmpUserResponse

    # Verify that all responses have the same list of stimuli
    assert (all(
        [stimuliListsOfLists[0] == header for header in stimuliListsOfLists]))

    # Transpose data
    tResponseDataList = [row for row in utils.safeZip(responseDataList, True)]
    tOrderListOfLists = []
    if len(orderListOfLists) > 0:
        tOrderListOfLists = [
            row for row in utils.safeZip(orderListOfLists, True)
        ]

    # Add a unique id to each row
    oom = utils.orderOfMagnitude(len(stimuliListsOfLists[0]))
    stimID = "s%%0%dd" % (oom + 1)
    stimuliList = [
        "%s,%s" % (stimID % i, row)
        for i, row in enumerate(stimuliListsOfLists[0])
    ]

    addSequenceInfo = len(tOrderListOfLists) > 0
    if addSequenceInfo:  # Add sequence information to each row
        tResponseDataList = [
            list(row) + list(sequenceInfo) for row, sequenceInfo in
            utils.safeZip([tResponseDataList, tOrderListOfLists], True)
        ]

    # Aggregate the stimuli and the responses in rows
    tResponseDataList = [list(row) for row in tResponseDataList]
    outputList = [[
        header,
    ] + list(row) for header, row in utils.safeZip(
        [stimuliList, tResponseDataList], True)]

    # Add the column heading rows
    # First row in unanonymized user names; Second row is anonymized
    numArgs = stimuliList[0].count(",")
    rowOne, rowTwo = _buildHeader(fnList, numArgs, pageName, addSequenceInfo)
    outputList = [
        rowOne,
        rowTwo,
    ] + outputList

    outputTxt = u"\n".join([",".join(row) for row in outputList])
    outputFN = join(outputPath, pageName + ".csv")
    with io.open(outputFN, "w", encoding="utf-8") as fd:
        fd.write(outputTxt)

    # Output a template users can fill in to auto score the results
    name = pageName + "_answer_template.csv"
    answersFN = join(outputPath, name)
    if os.path.exists(answersFN):
        print("Response template '%s' already exists.  Not overwriting." %
              name)
    else:
        outputTxt = u"\n".join(stimuliList)
        with io.open(answersFN, "w", encoding="utf-8") as fd:
            fd.write(outputTxt)
예제 #7
0
def transposeSurvey(path, surveyFullPathList, outputPath):
    utils.makeDir(outputPath)
    
    surveyDataList = []
    fnList = utils.findFiles(path, filterExt=".csv")
    for fn in fnList:
        surveyDataList.append(user_response.loadUserResponse(join(path, fn)))
    
    aspectKeyList = []
    
    # Load the data
    returnDict = {}
   
    defaultDict = {}
    for surveyFN in surveyFullPathList:
        fn = os.path.split(surveyFN)[1]
        surveyName = os.path.splitext(fn)[0]
        
        questionTitleDataList = []
        surveyQuestionDataList = []
        surveyItemList = survey.parseSurveyFile(surveyFN)
        for surveyItem in surveyItemList:
            for widgetType, widgetTextList in surveyItem.widgetList:
                if widgetType == "None":
                    continue
                if widgetType in ["Multiline_Textbox", "Sliding_Scale"]:
                    widgetTextList = ["", ]
                blankTxt = ["", ] * (len(widgetTextList) - 1)
                # Removing commas b/c we're using csv files
                surveyQuestion = surveyItem.text.replace(",", "")
                questionTitleDataList.extend([surveyQuestion, ] + blankTxt)
                if len(widgetTextList) == 0:
                    surveyQuestionDataList.extend(["", ])
                else:
                    surveyQuestionDataList.extend(widgetTextList)
        
        defaultDict.setdefault(surveyName, [])
        defaultDict[surveyName].append(questionTitleDataList)
        defaultDict[surveyName].append(surveyQuestionDataList)
    
    for fn, userDataList in utils.safeZip([fnList, surveyDataList], True):
        
        for dataTuple in userDataList:
            # taskName, stimuliArgList, argTxt, dataTxt = dataTuple
            stimuliArgList = dataTuple[1]
            stimuliID = stimuliArgList[0]
            dataTxt = dataTuple[3]
            
            returnDict.setdefault(stimuliID, defaultDict[stimuliID])
            
            dataList = dataTxt.split(",")
            returnDict[stimuliID].append(dataList)
    
    idKeyList = returnDict.keys()
    
    # Transpose the data
    for stimulusID in idKeyList:
        returnDict[stimulusID] = [list(subTuple) for subTuple in
                                  utils.safeZip(returnDict[stimulusID],
                                                enforceLength=True)]
        
        # Add a summation column
        newData = []
        for row in returnDict[stimulusID]:
            
            try:
                total = str(sum([int(val) if val != '' else 0
                                 for val in row[2:]]))
            except ValueError:
                total = '-'
            newData.append(row[:2] + [total, ] + row[2:])
        returnDict[stimulusID] = newData
        
        mainSurveyData = [",".join(subList) for subList in
                          returnDict[stimulusID]]
        
        outputTxtList = [",".join(["", "", "Total", ] + fnList), ]
        outputTxtList += mainSurveyData
        
        outputTxt = "\n".join(outputTxtList)
        outputFN = join(outputPath, stimulusID + ".csv")
        with io.open(outputFN, "w", encoding="utf-8") as fd:
            fd.write(outputTxt)
            
    return returnDict, idKeyList, aspectKeyList