Python safeZip Examples, lmeds.utilities.utils.safeZip Python Examples

Example #1

0

Show file

def markCorrect(inputFN, correctionFN, outputFN, evalFunc=None):
    '''
    Converts user responses into a binary--right or wrong--answer
    '''

    if evalFunc is None:
        evalFunc = lambda x, y: x == y

    # Load
    headerList, responseList = _parseTransposed(inputFN, False)
    answerList = _parseTransposed(correctionFN, True)[1]

    markedList = headerList
    for responseTuple, answerTuple in utils.safeZip([responseList, answerList],
                                                    True):
        assert (responseTuple[0] == answerTuple[0])

        userResponses = responseTuple[1]
        answer = answerTuple[1]
        markedRow = [
            "1" if evalFunc(val, answer) else "0" for val in userResponses
        ]

        markedList.append(responseTuple[0] + markedRow)

    markedList = [
        ",".join([transpose_utils.recListToStr(item) for item in row])
        for row in markedList
    ]
    outputTxt = "\n".join(markedList)
    with io.open(outputFN, "w", encoding="utf-8") as fd:
        fd.write(outputTxt)

    # Generate confusion matrix
    responseValList = [rTuple[1] for rTuple in responseList]
    answerValList = [aTuple[1] for aTuple in answerList]
    confusionMatrix = _generateConfusionMatrix(answerValList, responseValList,
                                               False)
    percentConfusionMatrix = _generateConfusionMatrix(answerValList,
                                                      responseValList, True)

    confusionMatrix = confusionMatrix + [
        "",
    ] + percentConfusionMatrix

    matrixOutputFN = os.path.splitext(outputFN)[0] + "_confusion_matrix.csv"
    confusionMatrix = [",".join(row) for row in confusionMatrix]
    outputTxt = "\n".join(confusionMatrix)
    with io.open(matrixOutputFN, "w", encoding="utf-8") as fd:
        fd.write(outputTxt)

Example #2

0

Show file

def agglutinateSpreadsheets(csvFNList, outputFN):
    
    csvDataList = []
    for fn in csvFNList:
        with io.open(fn, "r", encoding="utf-8") as fd:
            csvDataList.append(fd.readlines())
    
    outputDataList = []
    for rowList in utils.safeZip(csvDataList, enforceLength=True):
        rowList = [row.replace("\n", "") for row in rowList]
        outputDataList.append(",".join(rowList))
        
    outputTxt = "\n".join(outputDataList) + "\n"
    with io.open(outputFN, "w", encoding="utf-8") as fd:
        fd.write(outputTxt)

Example #3

0

Show file

File: transpose_choice.py Project: timmahrt/LMEDS

def markCorrect(inputFN, correctionFN, outputFN, evalFunc=None):
    '''
    Converts user responses into a binary--right or wrong--answer
    '''
    
    if evalFunc is None:
        evalFunc = lambda x, y: x == y
    
    # Load
    headerList, responseList = _parseTransposed(inputFN, False)
    answerList = _parseTransposed(correctionFN, True)[1]

    markedList = headerList
    for responseTuple, answerTuple in utils.safeZip([responseList, answerList],
                                                    True):
        assert(responseTuple[0] == answerTuple[0])
        
        userResponses = responseTuple[1]
        answer = answerTuple[1]
        markedRow = ["1" if evalFunc(val, answer) else "0"
                     for val in userResponses]
        
        markedList.append(responseTuple[0] + markedRow)
    
    markedList = [",".join([transpose_utils.recListToStr(item)
                            for item in row])
                  for row in markedList]
    outputTxt = "\n".join(markedList)
    with io.open(outputFN, "w", encoding="utf-8") as fd:
        fd.write(outputTxt)

    # Generate confusion matrix
    responseValList = [rTuple[1] for rTuple in responseList]
    answerValList = [aTuple[1] for aTuple in answerList]
    confusionMatrix = _generateConfusionMatrix(answerValList, responseValList,
                                               False)
    percentConfusionMatrix = _generateConfusionMatrix(answerValList,
                                                      responseValList,
                                                      True)
    
    confusionMatrix = confusionMatrix + ["", ] + percentConfusionMatrix
    
    matrixOutputFN = os.path.splitext(outputFN)[0] + "_confusion_matrix.csv"
    confusionMatrix = [",".join(row) for row in confusionMatrix]
    outputTxt = "\n".join(confusionMatrix)
    with io.open(matrixOutputFN, "w", encoding="utf-8") as fd:
        fd.write(outputTxt)

Example #4

0

Show file

def _generateConfusionMatrix(correctList, responseList, percentFlag):

    # Initialize dictionary
    confusionDict = {}
    flattenedResponseList = [
        val for sublist in responseList for val in sublist
    ]
    keyList = list(set(flattenedResponseList + correctList))
    keyList.sort()
    sumDict = {}
    for key1 in keyList:
        confusionDict[key1] = {}
        for key2 in keyList:
            confusionDict[key1][key2] = 0
            sumDict[key1] = 0

    # Sum values
    for answer, responses in utils.safeZip([correctList, responseList], True):
        for response in responses:
            confusionDict[answer][response] += 1
            sumDict[answer] += 1

    # Generate confusion matrix
    outputList = [
        [
            "",
        ] + keyList,
    ]
    for key1 in keyList:
        subList = [
            key1,
        ]
        for key2 in keyList:

            value = confusionDict[key1][key2]
            if percentFlag:
                try:
                    value = value / float(sumDict[key1])
                except ZeroDivisionError:
                    value = 0
            value = "%0.2f" % (value)

            subList.append(value)
        outputList.append(subList)

    return outputList

Example #5

0

Show file

File: transpose_choice.py Project: timmahrt/LMEDS

def _generateConfusionMatrix(correctList, responseList, percentFlag):
    
    # Initialize dictionary
    confusionDict = {}
    flattenedResponseList = [val for sublist in responseList
                             for val in sublist]
    keyList = list(set(flattenedResponseList + correctList))
    keyList.sort()
    sumDict = {}
    for key1 in keyList:
        confusionDict[key1] = {}
        for key2 in keyList:
            confusionDict[key1][key2] = 0
            sumDict[key1] = 0
    
    # Sum values
    for answer, responses in utils.safeZip([correctList, responseList], True):
        for response in responses:
            confusionDict[answer][response] += 1
            sumDict[answer] += 1
    
    # Generate confusion matrix
    outputList = [["", ] + keyList, ]
    for key1 in keyList:
        subList = [key1, ]
        for key2 in keyList:
            
            value = confusionDict[key1][key2]
            if percentFlag:
                try:
                    value = value / float(sumDict[key1])
                except ZeroDivisionError:
                    value = 0
            value = "%0.2f" % (value)
            
            subList.append(value)
        outputList.append(subList)
    
    return outputList

Example #6

0

Show file

def postProcessResults(testName, sequenceFN, removeDuplicatesFlag,
                       removeItemList=None):
    
    rootPath = join(constants.rootDir, "tests", testName)
    txtPath = join(rootPath, "txt")
    tmpSequence = sequence.TestSequence(None, join(rootPath, sequenceFN))
    fullPath = join(rootPath, "output", tmpSequence.sequenceTitle)
    pathToData = fullPath
    
    if removeDuplicatesFlag is True:
        removeDuplicates(pathToData, True)
        pathToData = join(pathToData, "duplicates_removed")
    else:
        newPathToData = join(pathToData, "duplicates_not_removed")
        utils.makeDir(newPathToData)
        for fn in utils.findFiles(pathToData, filterExt=".csv"):
            shutil.copy(join(pathToData, fn), join(newPathToData, fn))
        pathToData = newPathToData
    
    outputPath = pathToData + "_results"
    
    userResponseList = []
    fnList = utils.findFiles(pathToData, filterExt=".csv")
    for fn in fnList:
        fullPath = join(pathToData, fn)
        userResponseList.append(user_response.loadUserResponse(fullPath))
    
    # Don't continue if files are of different lengths
    testLen = len(userResponseList[0])
    if not all([len(response) == testLen for response in userResponseList]):
        print("ERROR: Not all responses in folder %s are the same length"
              % pathToData)
        countDict = {}
        for fn, response in utils.safeZip([fnList, userResponseList], True):
            countDict.setdefault(len(response), [])
            countDict[len(response)].append(fn)
            
        keyList = list(countDict.keys())
        keyList.sort()
        for numLines in keyList:
            print("%d lines - %s" % (numLines, str(countDict[numLines])))
        exit(0)
    
    # Don't continue if pages are different
    pageNameList = [[(pageTuple[0], pageTuple[1]) for pageTuple in response]
                    for response in userResponseList]
    sameList = []
    fnListOfLists = []
    for fn, pageList in utils.safeZip([fnList, pageNameList], True):
        i = 0
        while True:
            if len(sameList) == i:
                sameList.append(pageList)
                fnListOfLists.append([])
            else:
                if sameList[i] == pageList:
                    fnListOfLists[i].append(fn)
                    break
                else:
                    i += 1
    
    if len(sameList) == 0:
        print("ERROR: There don't appear to be any test data in folder %s"
              % pathToData)
        exit(0)
        
    if len(sameList) != 1:
        print("ERROR: User data doesn't agree.  Filenames printed on "
              "different lines differ in their pages.")
        
        for subFNList in fnListOfLists:
            print(", ".join(subFNList))
            
    # Extract the different tests users completed
    uniquePageList = []
    for pageTuple in pageNameList[0]:
        pageName = pageTuple[0]
        if pageName not in uniquePageList:
            uniquePageList.append(pageName)
    
    extractFromTest(pathToData, uniquePageList, removeItemList)
    
    # Transpose the surveys
    if "survey" in uniquePageList:
        surveyNameList = []
        for pageName, stimuliArgList in pageNameList[0]:
            if pageName == "survey":
                surveyName = stimuliArgList[0]
                surveyNameList.append(join(rootPath, surveyName + '.txt'))
        
        transpose_survey.transposeSurvey(join(pathToData, "survey"),
                                         surveyNameList, outputPath)
     
    # Transpose the rpt pages
    prominencePageList = ["prominence", "boundary", "boundary_and_prominence",
                          "syllable_marking"]
    for pageName in prominencePageList:
        if pageName in uniquePageList:
            transpose_rpt.transposeRPT(join(pathToData, pageName),
                                       txtPath, pageName, outputPath)
            
    choicePageList = ["media_choice", ]
    for pageName in choicePageList:
        if pageName in uniquePageList:
            transpose_choice.transposeChoice(join(pathToData, pageName),
                                             pageName,
                                             outputPath)

Example #7

0

Show file

File: transpose_choice.py Project: timmahrt/LMEDS

def transposeChoice(path, pageName, outputPath):
    
    utils.makeDir(outputPath)
    
    # Load response data
    responseDataList = []
    fnList = utils.findFiles(path, filterExt=".csv")
    for fn in fnList:
        a = user_response.loadUserResponse(join(path, fn))
        responseDataList.append(a)
    
    # Sort response if sequence order information is available
    parsedTuple = transpose_utils.parseResponse(responseDataList)
    responseDataList, stimuliListsOfLists, orderListOfLists = parsedTuple
    
    # Convert response to single answer
    tmpUserResponse = []
    for userDataList in responseDataList:
        # Get user response
        userResponse = [str(responseTuple[3].split(',').index('1'))
                        for responseTuple in userDataList]
        tmpUserResponse.append(userResponse)
    
    responseDataList = tmpUserResponse

    # Verify that all responses have the same list of stimuli
    assert(all([stimuliListsOfLists[0] == header
                for header in stimuliListsOfLists]))
    
    # Transpose data
    tResponseDataList = [row for row in utils.safeZip(responseDataList, True)]
    tOrderListOfLists = []
    if len(orderListOfLists) > 0:
        tOrderListOfLists = [row for row
                             in utils.safeZip(orderListOfLists, True)]
    
    # Add a unique id to each row
    oom = utils.orderOfMagnitude(len(stimuliListsOfLists[0]))
    stimID = "s%%0%dd" % (oom + 1)
    stimuliList = ["%s,%s" % (stimID % i, row)
                   for i, row in enumerate(stimuliListsOfLists[0])]
    
    addSequenceInfo = len(tOrderListOfLists) > 0
    if addSequenceInfo:  # Add sequence information to each row
        tResponseDataList = [list(row) + list(sequenceInfo)
                             for row, sequenceInfo
                             in utils.safeZip([tResponseDataList,
                                               tOrderListOfLists], True)]

    # Aggregate the stimuli and the responses in rows
    tResponseDataList = [list(row)
                         for row
                         in tResponseDataList]
    outputList = [[header, ] + list(row)
                  for header, row
                  in utils.safeZip([stimuliList, tResponseDataList], True)]
    
    # Add the column heading rows
    # First row in unanonymized user names; Second row is anonymized
    numArgs = stimuliList[0].count(",")
    rowOne, rowTwo = _buildHeader(fnList, numArgs, pageName, addSequenceInfo)
    outputList = [rowOne, rowTwo, ] + outputList
    
    outputTxt = u"\n".join([",".join(row) for row in outputList])
    outputFN = join(outputPath, pageName + ".csv")
    with io.open(outputFN, "w", encoding="utf-8") as fd:
        fd.write(outputTxt)

    # Output a template users can fill in to auto score the results
    name = pageName + "_answer_template.csv"
    answersFN = join(outputPath, name)
    if os.path.exists(answersFN):
        print("Response template '%s' already exists.  Not overwriting."
              % name)
    else:
        outputTxt = u"\n".join(stimuliList)
        with io.open(answersFN, "w", encoding="utf-8") as fd:
            fd.write(outputTxt)

Example #8

0

Show file

File: transpose_rpt.py Project: timmahrt/LMEDS

def transposeRPT(path, txtPath, pageName, outputPath):
    '''
    Transposes RPT data
    
    Input files: one file per subject
    Output files: one file per stimuli
    '''
    utils.makeDir(outputPath)

    # Load response data
    responseDataList = []
    fnList = utils.findFiles(path, filterExt=".csv")
    for fn in fnList:
        a = user_response.loadUserResponse(join(path, fn))
        responseDataList.append(a)
    
    # Load the demarcator, if there is one
    # and load the order info if present
    demarcator = None
    pageName, pageArgs, _, _ = responseDataList[0][0]
    if pageName == "syllable_marking":
        
        # The demarcator can either be an arg or a keyword arg.
        # Either way, it should be the last item in the list
        demarcator = pageArgs[-1]
        if "syllableDemarcator" in demarcator:
            demarcator = demarcator.split("=")[1]
    
    # Sort response if sequence order information is available
    parsedTuple = transpose_utils.parseResponse(responseDataList)
    responseDataList, _, orderListOfLists = parsedTuple
    orderList = []
    if len(orderListOfLists) > 0:
        orderList = [",".join(row) for row
                     in utils.safeZip(orderListOfLists, True)]
    
    # Load Words
    txtDict = {}
    for fn in utils.findFiles(txtPath, filterExt=".txt"):
        name = os.path.splitext(fn)[0]
        txtList = loader.loadTxtFile(join(txtPath, fn))
        
        txtList = [tmpTxt.replace(" ", ",") for tmpTxt in txtList]
        
        # Remove HTML tags
        txtList = [word for word in txtList if "<" not in word]
        
        txt = ",".join(txtList)
        
        if demarcator is None:
            txtDict[name] = [word for word in txt.split(",") if word != ""]
        else:
            txtDict[name] = [syllable for word in txt.split(",") if word != ""
                             for syllable in word.split(demarcator)]
    
    returnDict, idKeyList = _transposeRPT(responseDataList)
    
    doUserSeqHeader = len(orderListOfLists) > 0
    headerRow, anonHeaderRow = _buildHeader(fnList, pageName,
                                            doUserSeqHeader,
                                            idKeyList[0])
    
    # Format the output rpt scores
    aggrOutputList = [headerRow, anonHeaderRow]
    for i in range(len(idKeyList)):
        
        stimulusID = idKeyList[i]

        wordList = txtDict[stimulusID.split(",")[2]]
        stimulusIDList = [stimulusID for _ in wordList]
        aspectSumList = [stimulusIDList, wordList, ]
        aspectList = []

        try:
            bScoreList, bSumList = _getScores(returnDict[stimulusID],
                                              B)
        except KeyError:
            pass
        try:
            pScoreList, pSumList = _getScores(returnDict[stimulusID],
                                              P)
        except KeyError:
            pass
        
        if pageName == "boundary":
            aspectSumList.extend([bSumList, ])
            aspectList.extend([bScoreList, ])
        elif pageName in ["prominence", "syllable_marking"]:
            aspectSumList.extend([pSumList, ])
            aspectList.extend([pScoreList, ])
        elif pageName == "boundary_and_prominence":
            aspectSumList.extend([bSumList, pSumList, ])
            aspectList.extend([bScoreList, pScoreList, ])
    
        # Extend header with sequence order information
        if doUserSeqHeader:
            orderStr = orderList[i]
            numAnnotators = range(max([len(bSumList), len(pSumList)]))
            tmpOrderList = [orderStr for _ in numAnnotators]
            aspectList.extend([tmpOrderList, ])
            
        dataList = aspectSumList + aspectList
        combinedList = [_unifyRow(row) for row in
                        utils.safeZip(dataList, enforceLength=True)]
        aggrOutputList.extend([",".join(row) for row in combinedList])
        
    outputTxt = "\n".join(aggrOutputList)
    
    outputFN = join(outputPath, pageName + ".csv")
    with io.open(outputFN, "w", encoding="utf-8") as fd:
        fd.write(outputTxt)

Example #9

0

Show file

File: transpose_rpt.py Project: timmahrt/LMEDS

def _transposeRPT(dataListOfLists):

    idKeyList = []
    
    # Load the data
    returnDict = {}
    bCountList = []
    pCountList = []
    j = -1
    for dataList in dataListOfLists:
        j += 1
        bCountList.append([])
        pCountList.append([])
        
        oom = utils.orderOfMagnitude(len(dataList)) + 1
        stimTemplate = "s_%%0%dd," % oom
        i = 0
        for taskName, stimuliArgList, _, dataTxt in dataList:
            i += 1
            
            # Remove the sequence order variables from the stimuli arg list
            omitList = []
            for stimuliI, arg in enumerate(stimuliArgList):
                if 'orderSI=' in arg:
                    omitList.append(stimuliI)
                    continue
                if 'orderAI=' in arg:
                    omitList.append(stimuliI)
                    continue
            omitList.reverse()
            
            cutStimuliArgList = copy.deepcopy(stimuliArgList)
            for argI in omitList:
                cutStimuliArgList.pop(argI)
            
            stimuliID = stimTemplate % i + ','.join(cutStimuliArgList)
            
#             tmpAspectListToCount.append(aspect)
            dataList = dataTxt.split(",")
            
            if taskName == 'boundary_and_prominence':
                lenOfData = int(len(dataList) / 2.0)
            
                bScores = dataList[:lenOfData]
                pScores = dataList[lenOfData:]
            elif taskName == "boundary":
                bScores = dataList
                pScores = []
            elif taskName in ["prominence", "syllable_marking"]:
                bScores = []
                pScores = dataList
            else:
                bScores = None
                pScores = None
            
            pCountList[-1].append(len(pScores))
            bCountList[-1].append(len(bScores))
            
            if j == 0:
                idKeyList.append(stimuliID)
            
            returnDict.setdefault(stimuliID, {})
            returnDict[stimuliID].setdefault(B, [])
            returnDict[stimuliID].setdefault(P, [])
            
            returnDict[stimuliID][B].append(bScores)
            returnDict[stimuliID][P].append(pScores)
            
    # Transpose the data
    for sid in idKeyList:
        for taskType in [B, P]:
            try:
                tmpList = returnDict[sid][taskType]
            except KeyError:
                continue
            if len(tmpList) == 0:
                continue
            try:
                zipped = utils.safeZip(tmpList,
                                       enforceLength=True)
            except:
                print("Problem with score type: %s, SID: %s" % (taskType, sid))
                raise
            returnDict[sid][taskType] = [list(subTuple)
                                         for subTuple in zipped]
        
    return returnDict, idKeyList

Example #10

0

Show file

File: transpose_rpt.py Project: maureendss/LMEDS

def transposeRPT(path, txtPath, pageName, outputPath):
    '''
    Transposes RPT data
    
    Input files: one file per subject
    Output files: one file per stimuli
    '''
    utils.makeDir(outputPath)

    # Load response data
    responseDataList = []
    fnList = utils.findFiles(path, filterExt=".csv")
    for fn in fnList:
        a = user_response.loadUserResponse(join(path, fn))
        responseDataList.append(a)

    # Load the demarcator, if there is one
    # and load the order info if present
    demarcator = None
    pageName, pageArgs, _, _ = responseDataList[0][0]
    if pageName == "syllable_marking":

        # The demarcator can either be an arg or a keyword arg.
        # Either way, it should be the last item in the list
        demarcator = pageArgs[-1]
        if "syllableDemarcator" in demarcator:
            demarcator = demarcator.split("=")[1]

    # Sort response if sequence order information is available
    parsedTuple = transpose_utils.parseResponse(responseDataList)
    responseDataList, _, orderListOfLists = parsedTuple
    orderList = []
    if len(orderListOfLists) > 0:
        orderList = [
            ",".join(row) for row in utils.safeZip(orderListOfLists, True)
        ]

    # Load Words
    txtDict = {}
    for fn in utils.findFiles(txtPath, filterExt=".txt"):
        name = os.path.splitext(fn)[0]
        txtList = loader.loadTxtFile(join(txtPath, fn))

        txtList = [tmpTxt.replace(" ", ",") for tmpTxt in txtList]

        # Remove HTML tags
        txtList = [word for word in txtList if "<" not in word]

        txt = ",".join(txtList)

        if demarcator is None:
            txtDict[name] = [word for word in txt.split(",") if word != ""]
        else:
            txtDict[name] = [
                syllable for word in txt.split(",") if word != ""
                for syllable in word.split(demarcator)
            ]

    returnDict, idKeyList = _transposeRPT(responseDataList)

    doUserSeqHeader = len(orderListOfLists) > 0
    headerRow, anonHeaderRow = _buildHeader(fnList, pageName, doUserSeqHeader,
                                            idKeyList[0])

    # Format the output rpt scores
    aggrOutputList = [headerRow, anonHeaderRow]
    for i in range(len(idKeyList)):

        stimulusID = idKeyList[i]

        wordList = txtDict[stimulusID.split(",")[2]]
        stimulusIDList = [stimulusID for _ in wordList]
        aspectSumList = [
            stimulusIDList,
            wordList,
        ]
        aspectList = []

        try:
            bScoreList, bSumList = _getScores(returnDict[stimulusID], B)
        except KeyError:
            pass
        try:
            pScoreList, pSumList = _getScores(returnDict[stimulusID], P)
        except KeyError:
            pass

        if pageName == "boundary":
            aspectSumList.extend([
                bSumList,
            ])
            aspectList.extend([
                bScoreList,
            ])
        elif pageName in ["prominence", "syllable_marking"]:
            aspectSumList.extend([
                pSumList,
            ])
            aspectList.extend([
                pScoreList,
            ])
        elif pageName == "boundary_and_prominence":
            aspectSumList.extend([
                bSumList,
                pSumList,
            ])
            aspectList.extend([
                bScoreList,
                pScoreList,
            ])

        # Extend header with sequence order information
        if doUserSeqHeader:
            orderStr = orderList[i]
            numAnnotators = range(max([len(bSumList), len(pSumList)]))
            tmpOrderList = [orderStr for _ in numAnnotators]
            aspectList.extend([
                tmpOrderList,
            ])

        dataList = aspectSumList + aspectList
        combinedList = [
            _unifyRow(row)
            for row in utils.safeZip(dataList, enforceLength=True)
        ]
        aggrOutputList.extend([",".join(row) for row in combinedList])

    outputTxt = "\n".join(aggrOutputList)

    outputFN = join(outputPath, pageName + ".csv")
    with io.open(outputFN, "w", encoding="utf-8") as fd:
        fd.write(outputTxt)

Example #11

0

Show file

File: transpose_rpt.py Project: maureendss/LMEDS

def _transposeRPT(dataListOfLists):

    idKeyList = []

    # Load the data
    returnDict = {}
    bCountList = []
    pCountList = []
    j = -1
    for dataList in dataListOfLists:
        j += 1
        bCountList.append([])
        pCountList.append([])

        oom = utils.orderOfMagnitude(len(dataList)) + 1
        stimTemplate = "s_%%0%dd," % oom
        i = 0
        for taskName, stimuliArgList, _, dataTxt in dataList:
            i += 1

            # Remove the sequence order variables from the stimuli arg list
            omitList = []
            for stimuliI, arg in enumerate(stimuliArgList):
                if 'orderSI=' in arg:
                    omitList.append(stimuliI)
                    continue
                if 'orderAI=' in arg:
                    omitList.append(stimuliI)
                    continue
            omitList.reverse()

            cutStimuliArgList = copy.deepcopy(stimuliArgList)
            for argI in omitList:
                cutStimuliArgList.pop(argI)

            stimuliID = stimTemplate % i + ','.join(cutStimuliArgList)

            #             tmpAspectListToCount.append(aspect)
            dataList = dataTxt.split(",")

            if taskName == 'boundary_and_prominence':
                lenOfData = int(len(dataList) / 2.0)

                bScores = dataList[:lenOfData]
                pScores = dataList[lenOfData:]
            elif taskName == "boundary":
                bScores = dataList
                pScores = []
            elif taskName in ["prominence", "syllable_marking"]:
                bScores = []
                pScores = dataList
            else:
                bScores = None
                pScores = None

            pCountList[-1].append(len(pScores))
            bCountList[-1].append(len(bScores))

            if j == 0:
                idKeyList.append(stimuliID)

            returnDict.setdefault(stimuliID, {})
            returnDict[stimuliID].setdefault(B, [])
            returnDict[stimuliID].setdefault(P, [])

            returnDict[stimuliID][B].append(bScores)
            returnDict[stimuliID][P].append(pScores)

    # Transpose the data
    for sid in idKeyList:
        for taskType in [B, P]:
            try:
                tmpList = returnDict[sid][taskType]
            except KeyError:
                continue
            if len(tmpList) == 0:
                continue
            try:
                zipped = utils.safeZip(tmpList, enforceLength=True)
            except:
                print("Problem with score type: %s, SID: %s" % (taskType, sid))
                raise
            returnDict[sid][taskType] = [list(subTuple) for subTuple in zipped]

    return returnDict, idKeyList

Example #12

0

Show file

def transposeChoice(path, pageName, outputPath):

    utils.makeDir(outputPath)

    # Load response data
    responseDataList = []
    fnList = utils.findFiles(path, filterExt=".csv")
    for fn in fnList:
        a = user_response.loadUserResponse(join(path, fn))
        responseDataList.append(a)

    # Sort response if sequence order information is available
    parsedTuple = transpose_utils.parseResponse(responseDataList)
    responseDataList, stimuliListsOfLists, orderListOfLists = parsedTuple

    # Convert response to single answer
    tmpUserResponse = []
    for userDataList in responseDataList:
        # Get user response
        userResponse = [
            str(responseTuple[3].split(',').index('1'))
            for responseTuple in userDataList
        ]
        tmpUserResponse.append(userResponse)

    responseDataList = tmpUserResponse

    # Verify that all responses have the same list of stimuli
    assert (all(
        [stimuliListsOfLists[0] == header for header in stimuliListsOfLists]))

    # Transpose data
    tResponseDataList = [row for row in utils.safeZip(responseDataList, True)]
    tOrderListOfLists = []
    if len(orderListOfLists) > 0:
        tOrderListOfLists = [
            row for row in utils.safeZip(orderListOfLists, True)
        ]

    # Add a unique id to each row
    oom = utils.orderOfMagnitude(len(stimuliListsOfLists[0]))
    stimID = "s%%0%dd" % (oom + 1)
    stimuliList = [
        "%s,%s" % (stimID % i, row)
        for i, row in enumerate(stimuliListsOfLists[0])
    ]

    addSequenceInfo = len(tOrderListOfLists) > 0
    if addSequenceInfo:  # Add sequence information to each row
        tResponseDataList = [
            list(row) + list(sequenceInfo) for row, sequenceInfo in
            utils.safeZip([tResponseDataList, tOrderListOfLists], True)
        ]

    # Aggregate the stimuli and the responses in rows
    tResponseDataList = [list(row) for row in tResponseDataList]
    outputList = [[
        header,
    ] + list(row) for header, row in utils.safeZip(
        [stimuliList, tResponseDataList], True)]

    # Add the column heading rows
    # First row in unanonymized user names; Second row is anonymized
    numArgs = stimuliList[0].count(",")
    rowOne, rowTwo = _buildHeader(fnList, numArgs, pageName, addSequenceInfo)
    outputList = [
        rowOne,
        rowTwo,
    ] + outputList

    outputTxt = u"\n".join([",".join(row) for row in outputList])
    outputFN = join(outputPath, pageName + ".csv")
    with io.open(outputFN, "w", encoding="utf-8") as fd:
        fd.write(outputTxt)

    # Output a template users can fill in to auto score the results
    name = pageName + "_answer_template.csv"
    answersFN = join(outputPath, name)
    if os.path.exists(answersFN):
        print("Response template '%s' already exists.  Not overwriting." %
              name)
    else:
        outputTxt = u"\n".join(stimuliList)
        with io.open(answersFN, "w", encoding="utf-8") as fd:
            fd.write(outputTxt)

Example #13

0

Show file

def transposeSurvey(path, surveyFullPathList, outputPath):
    utils.makeDir(outputPath)
    
    surveyDataList = []
    fnList = utils.findFiles(path, filterExt=".csv")
    for fn in fnList:
        surveyDataList.append(user_response.loadUserResponse(join(path, fn)))
    
    aspectKeyList = []
    
    # Load the data
    returnDict = {}
   
    defaultDict = {}
    for surveyFN in surveyFullPathList:
        fn = os.path.split(surveyFN)[1]
        surveyName = os.path.splitext(fn)[0]
        
        questionTitleDataList = []
        surveyQuestionDataList = []
        surveyItemList = survey.parseSurveyFile(surveyFN)
        for surveyItem in surveyItemList:
            for widgetType, widgetTextList in surveyItem.widgetList:
                if widgetType == "None":
                    continue
                if widgetType in ["Multiline_Textbox", "Sliding_Scale"]:
                    widgetTextList = ["", ]
                blankTxt = ["", ] * (len(widgetTextList) - 1)
                # Removing commas b/c we're using csv files
                surveyQuestion = surveyItem.text.replace(",", "")
                questionTitleDataList.extend([surveyQuestion, ] + blankTxt)
                if len(widgetTextList) == 0:
                    surveyQuestionDataList.extend(["", ])
                else:
                    surveyQuestionDataList.extend(widgetTextList)
        
        defaultDict.setdefault(surveyName, [])
        defaultDict[surveyName].append(questionTitleDataList)
        defaultDict[surveyName].append(surveyQuestionDataList)
    
    for fn, userDataList in utils.safeZip([fnList, surveyDataList], True):
        
        for dataTuple in userDataList:
            # taskName, stimuliArgList, argTxt, dataTxt = dataTuple
            stimuliArgList = dataTuple[1]
            stimuliID = stimuliArgList[0]
            dataTxt = dataTuple[3]
            
            returnDict.setdefault(stimuliID, defaultDict[stimuliID])
            
            dataList = dataTxt.split(",")
            returnDict[stimuliID].append(dataList)
    
    idKeyList = returnDict.keys()
    
    # Transpose the data
    for stimulusID in idKeyList:
        returnDict[stimulusID] = [list(subTuple) for subTuple in
                                  utils.safeZip(returnDict[stimulusID],
                                                enforceLength=True)]
        
        # Add a summation column
        newData = []
        for row in returnDict[stimulusID]:
            
            try:
                total = str(sum([int(val) if val != '' else 0
                                 for val in row[2:]]))
            except ValueError:
                total = '-'
            newData.append(row[:2] + [total, ] + row[2:])
        returnDict[stimulusID] = newData
        
        mainSurveyData = [",".join(subList) for subList in
                          returnDict[stimulusID]]
        
        outputTxtList = [",".join(["", "", "Total", ] + fnList), ]
        outputTxtList += mainSurveyData
        
        outputTxt = "\n".join(outputTxtList)
        outputFN = join(outputPath, stimulusID + ".csv")
        with io.open(outputFN, "w", encoding="utf-8") as fd:
            fd.write(outputTxt)
            
    return returnDict, idKeyList, aspectKeyList