コード例 #1
0
    def __init__(self):
        self.opt = Options()
        self.utils = Utils()
        self.gspreadWrapper = GspreadWrapper()

        self.infringementsColumns = [
            self.opt.profanityCol, self.opt.nonConstructiveCol,
            self.opt.scoreCol, self.opt.copyCol, self.opt.incompleteReadingCol,
            self.opt.notRelatedCol, self.opt.otherCol
        ]
        self.positiveColumns = [self.opt.fairCol, self.opt.topQualityCol]
        '''
        self.neutralColumns = [
            self.opt.abstainCol, self.opt.lenientCol, self.opt.strictCol
        ]
        '''
        self.neutralColumns = [self.opt.abstainCol]
        self.indicatorColumns = [self.opt.lenientCol, self.opt.strictCol]
        self.yellowColumns = [
            self.opt.nonConstructiveCol, self.opt.scoreCol, self.opt.copyCol,
            self.opt.incompleteReadingCol, self.opt.notRelatedCol,
            self.opt.otherCol
        ]
        self.redColumns = [self.opt.profanityCol]
        self.feedbackColumns = self.infringementsColumns + self.positiveColumns
        self.allColumns = self.infringementsColumns + self.positiveColumns + self.neutralColumns + self.indicatorColumns
コード例 #2
0
class CreateProposerDocument():
    def __init__(self):
        self.options = Options()
        self.utils = Utils()
        self.gspreadWrapper = GspreadWrapper()

    def createDoc(self):
        print('Loading original...')
        self.gspreadWrapper.loadAssessmentsFile()
        print('Make a new copy...')
        spreadsheet = self.gspreadWrapper.gc.copy(
            self.options.originalExportFromIdeascale,
            title=self.options.proposerDocumentName
        )
        spreadsheet.share(
            self.options.accountEmail,
            perm_type='user',
            role='writer'
        )

        worksheet = spreadsheet.worksheet(self.options.assessmentsSheet)

        print('Setting headings for report...')
        # Add columns for y/r cards criteria
        currentColsCount = worksheet.col_count
        cellsToAdd = []
        # Set headings
        headings = [
            self.options.blankColumn, self.options.topQualityColumn,
            self.options.profanityColumn, self.options.scoreColumn,
            self.options.copyColumn, self.options.wrongChallengeColumn,
            self.options.wrongCriteriaColumn, self.options.otherColumn,
            self.options.otherRationaleColumn
        ]
        worksheet.add_cols(len(headings))

        print('Set column width...')
        set_column_widths(worksheet, [
            ('J:Q', 40), ('R', 200)
        ])

        for i, value in enumerate(headings):
            cellsToAdd.append(
                Cell(row=1, col=(currentColsCount + i + 1), value=value)
            )

        print('Mark blank assessments...')
        # Autofill blank assessments
        assessments = self.gspreadWrapper.getAssessmentsData()
        for note in assessments:
            assessment = note[self.options.assessmentColumn].strip()
            if (assessment == ''):
                col = (currentColsCount + 1)
                cellsToAdd.append(
                    Cell(row=note[self.options.assessmentsIdColumn], col=col, value='x')
                )
        worksheet.update_cells(cellsToAdd, value_input_option='USER_ENTERED')
        print('Document for proposers created')
        print('Link: {}'.format(spreadsheet.url))
コード例 #3
0
    def __init__(self):
        self.opt = Options()
        self.utils = Utils()
        self.gspreadWrapper = GspreadWrapper()

        print('Load vca data...')
        self.gspreadWrapper.getVCAMasterData()

        self.assessors = {}
        self.similarities = []

        self.similarityMinScore = 0.5

        self.initSimilarity()
コード例 #4
0
    def __init__(self):
        self.options = Options()
        self.utils = Utils()
        self.gspreadWrapper = GspreadWrapper()

        self.infringementsColumns = [
            self.options.profanityColumn, self.options.scoreColumn,
            self.options.copyColumn, self.options.wrongChallengeColumn,
            self.options.wrongCriteriaColumn, self.options.otherColumn
        ]
        self.positiveColumns = [
            self.options.fairColumn, self.options.topQualityColumn
        ]
        self.feedbackColumns = self.infringementsColumns + self.positiveColumns
コード例 #5
0
class CreateProposerDocument():
    def __init__(self):
        self.opt = Options()
        self.utils = Utils()
        self.gspreadWrapper = GspreadWrapper()

    def createDoc(self):
        print('Loading original...')
        self.gspreadWrapper.loadAssessmentsFile()
        proposerDf = self.gspreadWrapper.prepareDataFromExport()
        spreadsheet = self.gspreadWrapper.createDoc(
            self.opt.proposerDocumentName)

        # Define all the columns needed in the file
        headings = [
            self.opt.proposalKeyCol, self.opt.ideaURLCol, self.opt.questionCol,
            self.opt.assessorCol, self.opt.assessmentCol, self.opt.ratingCol,
            self.opt.assessmentsIdCol, self.opt.tripletIdCol,
            self.opt.proposalIdCol, self.opt.blankCol, self.opt.topQualityCol,
            self.opt.profanityCol, self.opt.nonConstructiveCol,
            self.opt.scoreCol, self.opt.copyCol, self.opt.incompleteReadingCol,
            self.opt.notRelatedCol, self.opt.otherCol,
            self.opt.otherRationaleCol
        ]

        print('Assign blanks...')
        # Assign 'x' for blank assessments
        proposerDf[self.opt.blankCol] = proposerDf[
            self.opt.assessmentCol].apply(lambda r: 'x'
                                          if (r.strip() == "") else '')

        print('Format columns...')
        widths = [('A:D', 150), ('E', 400), ('F', 60), ('G:R', 30), ('S', 400)]

        formats = [
            ('F:R', self.utils.counterFormat),
            ('A1:S1', self.utils.headingFormat),
            ('F1:R1', self.utils.verticalHeadingFormat),
            ('K2:K', self.utils.greenFormat),
            ('L2:L', self.utils.redFormat),
            ('M2:R', self.utils.yellowFormat),
            ('A2:E', self.utils.textFormat),
        ]

        self.gspreadWrapper.createSheetFromDf(spreadsheet, "Assessments",
                                              proposerDf, headings, widths,
                                              formats)
        print('Document for proposers created')
        print('Link: {}'.format(spreadsheet.url))
コード例 #6
0
class CreateSimilarity():
    def __init__(self):
        self.opt = Options()
        self.utils = Utils()
        self.gspreadWrapper = GspreadWrapper()

        print('Load vca data...')
        self.gspreadWrapper.getVCAMasterData()

        self.assessors = {}
        self.similarities = []

        self.similarityMinScore = 0.5

        self.initSimilarity()

    def createDoc(self):

        self.findSimilarity()

        for k in self.assessors:
            self.assessors[k]['similarity_other_assessors'] = ','.join(
                list(
                    dict.fromkeys(
                        self.assessors[k]['similarity_other_assessors'])))

        self.assessors = list(self.assessors.values())

        dfSimilarities = pd.DataFrame(self.similarities)
        dfAssessors = pd.DataFrame(self.assessors)

        dfSimilarities.to_csv('cache/sim5.csv')
        dfAssessors.to_csv('cache/sim_ass5.csv')

        spreadsheet = self.gspreadWrapper.createDoc('Similarity Analysis')

        self.gspreadWrapper.createSheetFromDf(
            spreadsheet,
            'Assessments',
            dfSimilarities, [
                'id A', 'id B', 'Assessor A', 'Assessor B', 'Note A', 'Note B',
                'Similarity Score'
            ],
            columnWidths=[('A:B', 50), ('C:D', 150), ('E:F', 300), ('G', 60)],
            formats=[('G', self.utils.counterFormat),
                     ('A1:G1', self.utils.headingFormat)])

        self.gspreadWrapper.createSheetFromDf(
            spreadsheet,
            'CAs',
            dfAssessors, [
                'Assessor', 'similarity_other_assessors',
                'similarity_count_others', 'similarity_count_self'
            ],
            columnWidths=[('A:B', 150), ('C:D', 60)],
            formats=[
                ('C:D', self.utils.counterFormat),
                ('A1:D1', self.utils.headingFormat),
                ('C1:D1', self.utils.verticalHeadingFormat),
            ])
        worksheet = spreadsheet.get_worksheet(0)
        spreadsheet.del_worksheet(worksheet)

        print('Link: {}'.format(spreadsheet.url))

    def initSimilarity(self):
        self.vectorize = lambda Text: TfidfVectorizer().fit_transform(
            Text).toarray()
        self.similarity = lambda doc1, doc2: cosine_similarity([doc1, doc2])

    def findSimilarity(self):
        data = self.gspreadWrapper.dfVca
        notes = list(data[self.opt.assessmentCol])
        ids = list(data[self.opt.assessmentsIdCol])
        vectors = self.vectorize(notes)
        s_vectors = list(zip(ids, vectors))
        plagiarism_results = set()
        progress = 0
        for assessor_a, text_vector_a in s_vectors:
            print("{} of {}".format(progress, len(s_vectors)))
            new_vectors = s_vectors.copy()
            current_index = new_vectors.index((assessor_a, text_vector_a))
            del new_vectors[current_index]
            for assessor_b, text_vector_b in new_vectors:
                sim_score = self.similarity(text_vector_a, text_vector_b)[0][1]
                assessor_pair = sorted((assessor_a, assessor_b))
                score = (assessor_pair[0], assessor_pair[1], sim_score)
                plagiarism_results.add(score)
            progress = progress + 1
        for res in plagiarism_results:
            if (res[2] > self.similarityMinScore):
                ass_0 = data.loc[data[self.opt.assessmentsIdCol] == res[0]][
                    self.opt.assessorCol].item()
                ass_1 = data.loc[data[self.opt.assessmentsIdCol] == res[1]][
                    self.opt.assessorCol].item()
                assessment_0 = data.loc[data[self.opt.assessmentsIdCol] ==
                                        res[0]][self.opt.assessmentCol].item()
                assessment_1 = data.loc[data[self.opt.assessmentsIdCol] ==
                                        res[1]][self.opt.assessmentCol].item()
                if (ass_0 not in self.assessors):
                    self.assessors[ass_0] = {
                        'Assessor': ass_0,
                        'similarity_other_assessors': [],
                        'similarity_count_others': 0,
                        'similarity_count_self': 0
                    }
                if (ass_0 != ass_1):
                    self.assessors[ass_0]['similarity_other_assessors'].append(
                        ass_1)
                    self.assessors[ass_0][
                        'similarity_count_others'] = self.assessors[ass_0][
                            'similarity_count_others'] + 1
                else:
                    self.assessors[ass_0][
                        'similarity_count_self'] = self.assessors[ass_0][
                            'similarity_count_self'] + 1
                self.similarities.append({
                    'id A': res[0],
                    'id B': res[1],
                    'Assessor A': ass_0,
                    'Assessor B': ass_1,
                    'Note A': assessment_0,
                    'Note B': assessment_1,
                    'Similarity Score': res[2]
                })
コード例 #7
0
 def __init__(self):
     self.options = Options()
     self.gspreadWrapper = GspreadWrapper()
コード例 #8
0
class createVCAAggregate():
    def __init__(self):
        self.opt = Options()
        self.utils = Utils()
        self.gspreadWrapper = GspreadWrapper()

        self.infringementsColumns = [
            self.opt.profanityCol, self.opt.nonConstructiveCol,
            self.opt.scoreCol, self.opt.copyCol, self.opt.incompleteReadingCol,
            self.opt.notRelatedCol, self.opt.otherCol
        ]
        self.positiveColumns = [self.opt.fairCol, self.opt.topQualityCol]
        '''
        self.neutralColumns = [
            self.opt.abstainCol, self.opt.lenientCol, self.opt.strictCol
        ]
        '''
        self.neutralColumns = [self.opt.abstainCol]
        self.indicatorColumns = [self.opt.lenientCol, self.opt.strictCol]
        self.yellowColumns = [
            self.opt.nonConstructiveCol, self.opt.scoreCol, self.opt.copyCol,
            self.opt.incompleteReadingCol, self.opt.notRelatedCol,
            self.opt.otherCol
        ]
        self.redColumns = [self.opt.profanityCol]
        self.feedbackColumns = self.infringementsColumns + self.positiveColumns
        self.allColumns = self.infringementsColumns + self.positiveColumns + self.neutralColumns + self.indicatorColumns

    def prepareBaseData(self):
        self.gspreadWrapper.getVCAMasterData()
        self.dfVca = self.gspreadWrapper.dfVca.set_index('id')
        # Set all counters to 0
        self.dfVca[self.opt.noVCAReviewsCol] = 0
        self.dfVca[self.opt.yellowCardCol] = 0
        self.dfVca[self.opt.redCardCol] = 0
        for col in self.allColumns:
            self.dfVca[col] = 0

        self.gspreadWrapper.getVCAMasterAssessors()
        self.gspreadWrapper.dfVcaAssessors[self.opt.yellowCardCol] = 0
        self.gspreadWrapper.dfVcaAssessors[self.opt.redCardCol] = 0

        self.gspreadWrapper.getProposersData()

    def loadVCAsFiles(self):
        self.prepareBaseData()
        self.vcaData = []
        self.vcaDocs = []
        for vcaFile in self.opt.VCAsFiles:
            print(vcaFile)
            vcaDocument = self.gspreadWrapper.gc.open_by_key(vcaFile)
            try:
                vcaSheet = vcaDocument.worksheet("Assessments")
            except:
                vcaSheet = vcaDocument.get_worksheet(0)
            data = pd.DataFrame(vcaSheet.get_all_records())
            data.set_index('id', inplace=True)
            self.vcaData.append(data)
            self.vcaDocs.append(vcaDocument)
            #sleep(35)

    def createDoc(self):
        self.loadVCAsFiles()
        # Loop over master ids as reference
        for id, row in self.dfVca.iterrows():
            # Loop over all vca files
            for vcaDf in self.vcaData:
                if (id in vcaDf.index):
                    locAss = vcaDf.loc[id]
                    integrity = self.checkIntegrity(id, row, locAss)
                    if (integrity is False):
                        print('Error')
                        break
                        break

                    good = self.goodFeedback(locAss)
                    bad = self.badFeedback(locAss)
                    neutral = self.neutralFeedback(locAss)
                    if (self.isVCAfeedbackValid(locAss, good, bad, neutral)):
                        if (good or bad):
                            self.dfVca.loc[
                                id, self.opt.noVCAReviewsCol] = self.dfVca.loc[
                                    id, self.opt.noVCAReviewsCol] + 1
                        for col in self.allColumns:
                            colVal = self.checkIfMarked(locAss, col)
                            if (colVal > 0):
                                self.dfVca.loc[
                                    id, col] = self.dfVca.loc[id, col] + colVal

            (yellow, red) = self.calculateCards(self.dfVca.loc[id])
            self.dfVca.loc[id, self.opt.yellowCardCol] = yellow
            self.dfVca.loc[id, self.opt.redCardCol] = red

        # Extract red card assessors and update List
        redCards = self.dfVca[self.dfVca[self.opt.redCardCol] > 0]
        self.redCardsAssessors = list(redCards[self.opt.assessorCol].unique())

        # Select valid assessments (no red card assessors, no yellow card assessments, no blank assessments)
        validAssessments = self.dfVca[(
            (self.dfVca[self.opt.yellowCardCol] == 0)
            & ~self.dfVca[self.opt.assessorCol].isin(self.redCardsAssessors))]
        validAssessments[self.opt.assessmentsIdCol] = validAssessments.index

        # Add Proposal title (getting it from Proposer doc)
        validAssessments[self.opt.proposalKeyCol] = validAssessments.apply(
            lambda x: str(self.gspreadWrapper.dfProposers.loc[
                self.gspreadWrapper.dfProposers[self.opt.assessmentsIdCol] ==
                x[self.opt.assessmentsIdCol], self.opt.proposalKeyCol].iloc[0]
                          ),
            axis=1)

        # create group for final scores
        finalProposals = validAssessments.groupby(
            self.opt.proposalKeyCol,
            as_index=False)[self.opt.ratingCol].mean()

        # generate Assessor Recap
        assessors = self.assessorRecap()
        # generate nonValidAssessment recap
        nonValidAssessments = self.nonValidAssessments(validAssessments)

        # Generate Doc
        validAssessments.to_csv('cache/valid.csv')
        nonValidAssessments.to_csv('cache/non-valid.csv')
        assessors.to_csv('cache/assessors.csv')
        spreadsheet = self.gspreadWrapper.createDoc(
            self.opt.VCAAggregateFileName)

        # Print valid assessments
        assessmentsHeadings = [
            self.opt.assessmentsIdCol, self.opt.tripletIdCol,
            self.opt.proposalKeyCol, self.opt.ideaURLCol,
            self.opt.proposalIdCol, self.opt.questionCol,
            self.opt.questionIdCol, self.opt.ratingCol, self.opt.assessorCol,
            self.opt.assessmentCol, self.opt.proposerMarkCol, self.opt.fairCol,
            self.opt.topQualityCol, self.opt.abstainCol, self.opt.strictCol,
            self.opt.lenientCol, self.opt.profanityCol,
            self.opt.nonConstructiveCol, self.opt.scoreCol, self.opt.copyCol,
            self.opt.incompleteReadingCol, self.opt.notRelatedCol,
            self.opt.otherCol, self.opt.noVCAReviewsCol,
            self.opt.yellowCardCol, self.opt.redCardCol
        ]
        assessmentsWidths = [('A', 40), ('B', 60), ('C', 120), ('D', 120),
                             ('E', 40), ('F', 200), ('G', 40), ('H', 60),
                             ('I', 120), ('J', 400), ('K:Z', 30)]
        assessmentsFormats = [
            ('H:H', self.utils.counterFormat),
            ('J:J', self.utils.noteFormat),
            ('K:Z', self.utils.counterFormat),
            ('A1:X1', self.utils.headingFormat),
            ('B1', self.utils.verticalHeadingFormat),
            ('E1', self.utils.verticalHeadingFormat),
            ('G1:H1', self.utils.verticalHeadingFormat),
            ('K1:Z1', self.utils.verticalHeadingFormat),
            ('L2:L', self.utils.greenFormat),
            ('Q2:Q', self.utils.redFormat),
            ('R2:W', self.utils.yellowFormat),
            ('Y2:Y', self.utils.yellowFormat),
            ('Z2:Z', self.utils.redFormat),
        ]

        self.gspreadWrapper.createSheetFromDf(spreadsheet,
                                              'Valid Assessments',
                                              validAssessments,
                                              assessmentsHeadings,
                                              columnWidths=assessmentsWidths,
                                              formats=assessmentsFormats)

        # Print assessors recap

        # Write sheet with CAs summary
        self.gspreadWrapper.createSheetFromDf(
            spreadsheet,
            'Community Advisors',
            assessors, [
                'assessor', 'total', 'blanks', 'blankPercentage', 'excluded',
                'Yellow Card', 'Red Card', 'Constructive Feedback', 'note'
            ],
            columnWidths=[('A', 140), ('B:D', 60), ('E', 100), ('F:H', 40),
                          ('I', 200)],
            formats=[
                ('B:C', self.utils.counterFormat),
                ('F:H', self.utils.counterFormat),
                ('D2:D', self.utils.percentageFormat),
                ('A1:E1', self.utils.headingFormat),
                ('B1:D1', self.utils.verticalHeadingFormat),
                ('F1:H1', self.utils.verticalHeadingFormat),
                ('F2:F', self.utils.yellowFormat),
                ('G2:G', self.utils.redFormat),
                ('H2:H', self.utils.greenFormat),
            ])

        # Print non valid assessments -> reason, extract from proposer Doc

        nonAssessmentsHeadings = [
            self.opt.assessmentsIdCol, self.opt.tripletIdCol,
            self.opt.ideaURLCol, self.opt.proposalIdCol, self.opt.questionCol,
            self.opt.ratingCol, self.opt.assessorCol, self.opt.assessmentCol,
            self.opt.blankCol, self.opt.proposerMarkCol, self.opt.fairCol,
            self.opt.topQualityCol, self.opt.abstainCol, self.opt.strictCol,
            self.opt.lenientCol, self.opt.profanityCol,
            self.opt.nonConstructiveCol, self.opt.scoreCol, self.opt.copyCol,
            self.opt.incompleteReadingCol, self.opt.notRelatedCol,
            self.opt.otherCol, self.opt.noVCAReviewsCol,
            self.opt.yellowCardCol, self.opt.redCardCol, 'reason'
        ]
        nonAssessmentsWidths = [('A', 40), ('B', 60), ('C', 120), ('D', 40),
                                ('E', 200), ('F', 40), ('G', 120), ('H', 400),
                                ('I:Y', 30), ('Z', 200)]
        nonAssessmentsFormats = [
            ('G:G', self.utils.counterFormat),
            ('H:H', self.utils.noteFormat),
            ('I:Y', self.utils.counterFormat),
            ('A1:W1', self.utils.headingFormat),
            ('B1', self.utils.verticalHeadingFormat),
            ('D1', self.utils.verticalHeadingFormat),
            ('F1:G1', self.utils.verticalHeadingFormat),
            ('I1:Y1', self.utils.verticalHeadingFormat),
            ('K2:L', self.utils.greenFormat),
            ('P2:P', self.utils.redFormat),
            ('Q2:V', self.utils.yellowFormat),
            ('X2:X', self.utils.yellowFormat),
            ('Y2:Y', self.utils.redFormat),
        ]

        self.gspreadWrapper.createSheetFromDf(
            spreadsheet,
            'Excluded Assessments',
            nonValidAssessments,
            nonAssessmentsHeadings,
            columnWidths=nonAssessmentsWidths,
            formats=nonAssessmentsFormats)

        # Print vca recap
        allVcas = []
        for vcaDoc in self.vcaDocs:
            allVcas.append({'title': vcaDoc.title, 'link': vcaDoc.url})

        allVcasDf = pd.DataFrame(allVcas)

        self.gspreadWrapper.createSheetFromDf(spreadsheet,
                                              'Veteran Community Advisors',
                                              allVcasDf)

        proposalsWidths = [('A', 300), ('B', 60)]
        proposalsFormats = [('B:B', self.utils.counterFormat),
                            ('A:A', self.utils.noteFormat),
                            ('A1:B1', self.utils.headingFormat),
                            ('B1', self.utils.verticalHeadingFormat)]

        self.gspreadWrapper.createSheetFromDf(
            spreadsheet,
            'Proposals',
            finalProposals, [self.opt.proposalKeyCol, self.opt.ratingCol],
            columnWidths=proposalsWidths,
            formats=proposalsFormats)

        print('Aggregated Document created')
        print('Link: {}'.format(spreadsheet.url))

    def nonValidAssessments(self, validAssessments):
        self.gspreadWrapper.getProposersData()
        dfProposers = self.gspreadWrapper.dfProposers.set_index('id')
        dfProposers[self.opt.assessmentsIdCol] = dfProposers.index
        for col in self.allColumns:
            dfProposers[col] = ''
        dfProposers[self.opt.proposerMarkCol] = ''
        dfProposers[self.opt.otherRationaleCol] = ''
        nonValidAssessments = dfProposers[~dfProposers[
            self.opt.assessmentsIdCol].isin(validAssessments.index)].copy()
        for id, row in nonValidAssessments.iterrows():
            if (id in self.dfVca.index):
                for col in self.allColumns:
                    nonValidAssessments.loc[id, col] = int(self.dfVca.loc[id,
                                                                          col])
                nonValidAssessments.loc[id, self.opt.yellowCardCol] = int(
                    self.dfVca.loc[id, self.opt.yellowCardCol])
                nonValidAssessments.loc[id, self.opt.redCardCol] = int(
                    self.dfVca.loc[id, self.opt.redCardCol])
                nonValidAssessments.loc[id, self.opt.noVCAReviewsCol] = int(
                    self.dfVca.loc[id, self.opt.noVCAReviewsCol])
        nonValidAssessments['reason'] = nonValidAssessments.apply(
            self.describeReason, axis=1)
        nonValidAssessments.fillna('', inplace=True)
        return nonValidAssessments

    def describeReason(self, row):
        reason = []
        if (row[self.opt.blankCol] == 'x'):
            reason.append(self.opt.blankCol)
        if (row['id'] in self.dfVca.index):
            tot = row[self.opt.noVCAReviewsCol]
            for col in self.infringementsColumns:
                if (tot > 0):
                    if ((row[col] / tot) >= self.opt.cardLimit):
                        reason.append(col)
        excludedAssessors = list(self.gspreadWrapper.dfVcaAssessors[
            self.gspreadWrapper.dfVcaAssessors['excluded'] == 'TRUE']
                                 ['assessor'])
        if (row[self.opt.assessorCol] in excludedAssessors):
            reason.append('Assessor excluded')
        return ', '.join(reason)

    def assessorRecap(self):
        self.gspreadWrapper.dfVcaAssessors.loc[
            self.gspreadWrapper.dfVcaAssessors['assessor'].
            isin(self.redCardsAssessors), 'excluded'] = 'TRUE'
        self.gspreadWrapper.dfVcaAssessors.loc[
            self.gspreadWrapper.dfVcaAssessors['assessor'].
            isin(self.redCardsAssessors), 'note'] = "red card"

        # Extract assessors
        locAssessors = self.dfVca.groupby(self.opt.assessorCol).agg(
            constructiveFeedback=(self.opt.topQualityCol, 'sum'),
            red=(self.opt.redCardCol, 'sum'),
            yellow=(self.opt.yellowCardCol, 'sum'),
        )
        for id, row in locAssessors.iterrows():
            self.gspreadWrapper.dfVcaAssessors.loc[
                self.gspreadWrapper.dfVcaAssessors['assessor'] == id,
                self.opt.redCardCol] = row['red']
            self.gspreadWrapper.dfVcaAssessors.loc[
                self.gspreadWrapper.dfVcaAssessors['assessor'] == id,
                self.opt.yellowCardCol] = row['yellow']
            self.gspreadWrapper.dfVcaAssessors.loc[
                self.gspreadWrapper.dfVcaAssessors['assessor'] == id,
                self.opt.topQualityCol] = row['constructiveFeedback']

        self.gspreadWrapper.dfVcaAssessors.fillna('', inplace=True)
        return self.gspreadWrapper.dfVcaAssessors

    def checkIntegrity(self, id, ass1, ass2):
        if ((ass1[self.opt.proposalIdCol] != ass2[self.opt.proposalIdCol]) or
            (ass1[self.opt.questionIdCol] != ass2[self.opt.questionIdCol])
                or (ass1[self.opt.ratingCol] != ass2[self.opt.ratingCol])
                or (ass1[self.opt.assessorCol] != ass2[self.opt.assessorCol])):
            print("Something wrong with assessment {}".format(id))
            return False
        return True

    def checkIfMarked(self, row, column):
        if (row[column].strip() != ''):
            return 1
        return 0

    def calculateCards(self, row):
        yellow = 0
        red = 0
        tot = row[self.opt.noVCAReviewsCol]
        if (tot >= self.opt.minimumVCA):
            if ((row[self.opt.profanityCol] / tot) >= self.opt.cardLimit):
                red = red + 1
            for col in self.yellowColumns:
                if ((row[col] / tot) >= self.opt.cardLimit):
                    yellow = yellow + 1
        return (yellow, red)

    def goodFeedback(self, row):
        for col in self.positiveColumns:
            if (self.checkIfMarked(row, col) > 0):
                return True
        return False

    def badFeedback(self, row):
        for col in self.infringementsColumns:
            if (self.checkIfMarked(row, col) > 0):
                return True
        return False

    def badValid(self, row):
        if ((self.checkIfMarked(row, self.opt.otherCol) == 1) and
            (self.checkIfMarked(row, self.opt.otherRationaleCol) == 0)):
            return False
        return True

    def neutralFeedback(self, row):
        for col in self.neutralColumns:
            if (self.checkIfMarked(row, col) > 0):
                return True
        return False

    def isVCAfeedbackValid(self, row, good, bad, neutral):
        if (bad):
            if (self.badValid(row) is False):
                return False
        if (sum([good, bad, neutral]) <= 1):
            return True
        return False
コード例 #9
0
 def __init__(self):
     self.opt = Options()
     self.utils = Utils()
     self.gspreadWrapper = GspreadWrapper()
コード例 #10
0
class CreateVCAMaster():
    def __init__(self):
        self.options = Options()
        self.utils = Utils()
        self.gspreadWrapper = GspreadWrapper()

    def createDoc(self):
        print('Create new document...')
        spreadsheet = self.gspreadWrapper.gc.create(
            self.options.VCAMasterFileName)
        spreadsheet.share(self.options.accountEmail,
                          perm_type='user',
                          role='writer')

        print('Create sheet...')
        worksheet = spreadsheet.get_worksheet(0)
        worksheet.update_title("Assessments")

        cellsToAdd = []
        # Set headings
        print('Set headings...')
        headings = [
            self.options.assessmentsIdColumn, self.options.ideaURLColumn,
            self.options.questionColumn, self.options.ratingColumn,
            self.options.assessorColumn, self.options.assessmentColumn,
            self.options.proposerMarkColumn, self.options.fairColumn,
            self.options.topQualityColumn, self.options.profanityColumn,
            self.options.scoreColumn, self.options.copyColumn,
            self.options.wrongChallengeColumn,
            self.options.wrongCriteriaColumn, self.options.otherColumn,
            self.options.otherRationaleColumn
        ]

        for i, value in enumerate(headings):
            cellsToAdd.append(Cell(row=1, col=(i + 1), value=value))

        print('Set column width...')
        set_column_widths(worksheet, [('A', 40), ('B:C', 200), ('D', 40),
                                      ('E', 120), ('F', 400), ('G:O', 30),
                                      ('P', 300)])

        print('Format columns')
        noteFormat = cellFormat(wrapStrategy='CLIP')
        flagFormat = cellFormat(textFormat=textFormat(bold=True),
                                horizontalAlignment='CENTER')
        format_cell_ranges(worksheet,
                           [('D:D', flagFormat), ('F:F', noteFormat),
                            ('G:O', flagFormat)])

        print('Load proposers flagged reviews...')
        assessments = self.gspreadWrapper.getProposersData()

        # extract Assessors
        assessors = self.gspreadWrapper.groupByAssessor(assessments)

        # filter assessors with more than allowed blank reviews.
        excludedAssessors = [
            k for k in assessors if (assessors[k]['blankPercentage'] >=
                                     self.options.allowedBlankPerAssessor)
        ]
        includedAssessors = [
            k for k in assessors if (assessors[k]['blankPercentage'] <
                                     self.options.allowedBlankPerAssessor)
        ]

        proposersDoc = self.gspreadWrapper.gc.open_by_key(
            self.options.proposersFile)
        self.gspreadWrapper.createSheetFromGroup(proposersDoc, 'Excluded CAs',
                                                 assessors, excludedAssessors,
                                                 ['assessments'])

        self.gspreadWrapper.createSheetFromGroup(proposersDoc, 'Included CAs',
                                                 assessors, includedAssessors,
                                                 ['assessments'])

        # Add sheet for excluded/included assessors

        index = 2
        print('Cloning flagged reviews...')
        for assessment in assessments:
            if (assessment[self.options.assessorColumn]
                    not in excludedAssessors):
                marked = 'x' if (
                    (assessment[self.options.profanityColumn] == 'x') or
                    (assessment[self.options.scoreColumn] == 'x') or
                    (assessment[self.options.copyColumn] == 'x') or
                    (assessment[self.options.wrongChallengeColumn] == 'x') or
                    (assessment[self.options.wrongCriteriaColumn] == 'x') or
                    (assessment[self.options.otherColumn] == 'x')) else ''
                cellsToAdd.extend([
                    Cell(row=index,
                         col=1,
                         value=assessment[self.options.assessmentsIdColumn]),
                    Cell(row=index,
                         col=2,
                         value=assessment[self.options.ideaURLColumn]),
                    Cell(row=index,
                         col=3,
                         value=assessment[self.options.questionColumn]),
                    Cell(row=index,
                         col=4,
                         value=assessment[self.options.ratingColumn]),
                    Cell(row=index,
                         col=5,
                         value=assessment[self.options.assessorColumn]),
                    Cell(row=index,
                         col=6,
                         value=assessment[self.options.assessmentColumn]),
                    Cell(row=index, col=7, value=marked)
                ])

                index = index + 1
        worksheet.update_cells(cellsToAdd, value_input_option='USER_ENTERED')
        print('Master Document for vCAs created')
        print('Link: {}'.format(spreadsheet.url))
コード例 #11
0
class createVCAAggregate():
    def __init__(self):
        self.options = Options()
        self.utils = Utils()
        self.gspreadWrapper = GspreadWrapper()

        self.infringementsColumns = [
            self.options.profanityColumn, self.options.scoreColumn,
            self.options.copyColumn, self.options.wrongChallengeColumn,
            self.options.wrongCriteriaColumn, self.options.otherColumn
        ]
        self.positiveColumns = [
            self.options.fairColumn, self.options.topQualityColumn
        ]
        self.feedbackColumns = self.infringementsColumns + self.positiveColumns

    def loadVCAsFiles(self):
        masterDocument = self.gspreadWrapper.gc.open_by_key(self.options.VCAMasterFile)
        masterSheet = masterDocument.worksheet("Assessments")
        masterData = masterSheet.get_all_records()
        self.masterDataByIds = self.gspreadWrapper.groupById(masterData)
        self.vcaData = []
        for vcaFile in self.options.VCAsFiles:
            vcaDocument = self.gspreadWrapper.gc.open_by_key(vcaFile)
            vcaSheet = vcaDocument.worksheet("Assessments")
            data = vcaSheet.get_all_records()
            dataByIds = self.gspreadWrapper.groupById(data)
            self.vcaData.append(dataByIds)


    def createDoc(self):
        self.loadVCAsFiles()
        print('Create new document...')
        spreadsheet = self.gspreadWrapper.gc.create(self.options.VCAAggregateFileName)
        spreadsheet.share(
            self.options.accountEmail,
            perm_type='user',
            role='writer'
        )
        print('vCA aggregate document created.')

        print('Create sheet...')

        excludedAssessors = []
        validAssessments = []
        yellowAssessments = []
        assessments = []
        # Loop over master ids as reference
        for id in self.masterDataByIds:
            assessment = {}
            assessment[self.options.assessmentsIdColumn] = id
            assessment[self.options.assessorColumn] = self.masterDataByIds[id][self.options.assessorColumn]
            assessment[self.options.noVCAReviewsColumn] = 0
            assessment[self.options.fairColumn] = 0
            assessment[self.options.topQualityColumn] = 0
            for col in self.infringementsColumns:
                assessment[col] = 0
            assessment[self.options.yellowCardColumn] = 0
            assessment[self.options.redCardColumn] = 0

            # Loop over all vca files
            for vcaFile in self.vcaData:
                fair = self.checkIfMarked(vcaFile[id], self.options.fairColumn)
                if (self.isVCAfeedbackValid(fair, vcaFile[id])):
                    assessment[self.options.noVCAReviewsColumn] = assessment[self.options.noVCAReviewsColumn] + self.checkIfReviewed(vcaFile[id])
                    assessment[self.options.fairColumn] = assessment[self.options.fairColumn] + fair
                    assessment[self.options.topQualityColumn] = assessment[self.options.topQualityColumn] + self.checkIfMarked(vcaFile[id], self.options.topQualityColumn)
                    for col in self.infringementsColumns:
                        assessment[col] = assessment[col] + self.checkIfMarked(vcaFile[id], col)

            (yellow, red) = self.calculateCards(assessment)
            assessment[self.options.yellowCardColumn] = yellow
            assessment[self.options.redCardColumn] = red

            if (red >= 1):
                excludedAssessors.append(assessment[self.options.assessorColumn])
            if (yellow >= 1):
                yellowAssessments.append(assessment)
            assessments.append(assessment)

        validAssessments = self.filterAssessments(yellowAssessments, excludedAssessors)

        self.gspreadWrapper.createSheetFromList(
            spreadsheet,
            'Aggregated',
            assessments,
            []
        )

        self.gspreadWrapper.createSheetFromList(
            spreadsheet,
            'Valid Assessments',
            validAssessments,
            [
                self.options.proposerMarkColumn, self.options.fairColumn,
                self.options.topQualityColumn, self.options.profanityColumn,
                self.options.scoreColumn, self.options.copyColumn,
                self.options.wrongChallengeColumn, self.options.wrongCriteriaColumn,
                self.options.otherColumn, self.options.otherRationaleColumn
            ]
        )

        worksheet = spreadsheet.get_worksheet(0)
        spreadsheet.del_worksheet(worksheet)

        print('Aggregated document created')

        print('Link: {}'.format(spreadsheet.url))

    def checkIfReviewed(self, row):
        result = False
        for col in self.feedbackColumns:
            result = result or (row[col] == 'x')
        if (result):
            return 1
        return 0

    def checkIfMarked(self, row, column):
        if (row[column] == 'x'):
            return 1
        return 0

    def calculateCards(self, row):
        yellow = 0
        red = 0
        tot = row[self.options.noVCAReviewsColumn]
        if (tot >= self.options.minimumVCA):
            if ((row[self.options.profanityColumn]/tot) >= self.options.profanityLimit):
                red = red + 1
            if ((row[self.options.scoreColumn]/tot) >= self.options.scoreLimit):
                yellow = yellow + 1
            if ((row[self.options.copyColumn]/tot) >= self.options.copyLimit):
                yellow = yellow + 1
            if ((row[self.options.wrongChallengeColumn]/tot) >= self.options.wrongChallengeLimit):
                yellow = yellow + 1
            if ((row[self.options.wrongCriteriaColumn]/tot) >= self.options.wrongCriteriaLimit):
                yellow = yellow + 1
            if ((row[self.options.otherColumn]/tot) >= self.options.otherLimit):
                yellow = yellow + 1
        return (yellow, red)

    def isVCAfeedbackValid(self, fairCount, row):
        if (fairCount == 1):
            for col in self.infringementsColumns:
                if (self.checkIfMarked(row, col) > 0):
                    return False
        return True


    def filterAssessments(self, yellowAssessments, excludedAssessors):
        filtered = []
        yellowRelatedTripletsIds = self.getRelatedTriplets(yellowAssessments)
        assessments = self.masterDataByIds
        for id in assessments:
            if id not in yellowRelatedTripletsIds:
                if (assessments[id][self.options.assessorColumn] not in excludedAssessors):
                    filtered.append(assessments[id])

        return filtered

    def getRelatedTriplets(self, yellowAssessments):
        return []
コード例 #12
0
class CreateVCAMaster():
    def __init__(self):
        self.opt = Options()
        self.utils = Utils()
        self.gspreadWrapper = GspreadWrapper()

    def createDoc(self):
        spreadsheet = self.gspreadWrapper.createDoc(self.opt.VCAMasterFileName)

        # Define headings for VCAMasterFile
        print('Define headings...')
        headings = [
            self.opt.assessmentsIdCol, self.opt.tripletIdCol,
            self.opt.ideaURLCol, self.opt.proposalIdCol, self.opt.questionCol,
            self.opt.questionIdCol, self.opt.ratingCol, self.opt.assessorCol,
            self.opt.assessmentCol, self.opt.proposerMarkCol, self.opt.fairCol,
            self.opt.topQualityCol, self.opt.abstainCol, self.opt.strictCol,
            self.opt.lenientCol, self.opt.profanityCol,
            self.opt.nonConstructiveCol, self.opt.scoreCol, self.opt.copyCol,
            self.opt.incompleteReadingCol, self.opt.notRelatedCol,
            self.opt.otherCol, self.opt.otherRationaleCol
        ]

        print('Load proposers flagged reviews...')
        self.gspreadWrapper.getProposersData()
        #self.gspreadWrapper.dfProposers.to_csv('test.csv')

        # Extract assessors
        assessors = self.gspreadWrapper.dfProposers.groupby(
            self.opt.assessorCol).agg(total=(self.opt.assessmentCol, 'count'),
                                      blanks=(self.opt.blankCol,
                                              (lambda x: (x == 'x').sum())))

        # Calculate and extract assessors by blanks
        assessors['blankPercentage'] = assessors['blanks'] / assessors['total']
        assessors['excluded'] = (assessors['blankPercentage'] >=
                                 self.opt.allowedBlankPerAssessor)
        excludedAssessors = assessors[(
            assessors['excluded'] == True)].index.tolist()
        includedAssessors = assessors[(assessors['excluded'] !=
                                       True)].index.tolist()

        # Exclude assessors that are also proposers (get from options)
        includedAssessors = [
            x for x in includedAssessors
            if (x not in self.opt.excludedCAProposers)
        ]
        excludedAssessors.extend(self.opt.excludedCAProposers)

        assessors['assessor'] = assessors.index

        # Filter out assessments made by excluded assessors
        validAssessments = self.gspreadWrapper.dfProposers[
            self.gspreadWrapper.dfProposers[self.opt.assessorCol].isin(
                includedAssessors)]

        # Filter out blank assessments
        validAssessments = validAssessments[
            validAssessments[self.opt.blankCol] != 'x']

        # Remove proposers marks
        criteria = self.gspreadWrapper.infringementsColumns + [
            self.opt.topQualityCol, self.opt.otherRationaleCol
        ]
        for col in criteria:
            validAssessments[col] = ''

        # Assign 'x' for marks
        validAssessments[self.opt.proposerMarkCol] = validAssessments[
            self.opt.proposerMarkCol].apply(lambda r: 'x' if (r) else '')

        # Write sheet with assessments
        assessmentsWidths = [('A', 40), ('B', 60), ('D', 40), ('E', 200),
                             ('F', 40), ('G', 60), ('H', 120), ('I', 400),
                             ('J:V', 30), ('W', 300)]
        assessmentsFormats = [
            ('G:G', self.utils.counterFormat),
            ('I:I', self.utils.noteFormat),
            ('J:V', self.utils.counterFormat),
            ('A1:W1', self.utils.headingFormat),
            ('B1', self.utils.verticalHeadingFormat),
            ('D1', self.utils.verticalHeadingFormat),
            ('F1:G1', self.utils.verticalHeadingFormat),
            ('J1:V1', self.utils.verticalHeadingFormat),
            ('K2:L', self.utils.greenFormat),
            ('P2:P', self.utils.redFormat),
            ('Q2:V', self.utils.yellowFormat),
        ]

        self.gspreadWrapper.createSheetFromDf(spreadsheet,
                                              'Assessments',
                                              validAssessments,
                                              headings,
                                              columnWidths=assessmentsWidths,
                                              formats=assessmentsFormats)

        # Write sheet with CAs summary
        self.gspreadWrapper.createSheetFromDf(
            spreadsheet,
            'Community Advisors',
            assessors,
            ['assessor', 'total', 'blanks', 'blankPercentage', 'excluded'],
            columnWidths=[('A', 140), ('B:D', 60), ('E', 100)],
            formats=[
                ('B:C', self.utils.counterFormat),
                ('D2:D', self.utils.percentageFormat),
                ('A1:E1', self.utils.headingFormat),
                ('B1:D1', self.utils.verticalHeadingFormat),
            ])
        print('Master Document for vCAs created')
        print('Link: {}'.format(spreadsheet.url))