def __init__(self): self.opt = Options() self.utils = Utils() self.gspreadWrapper = GspreadWrapper() self.infringementsColumns = [ self.opt.profanityCol, self.opt.nonConstructiveCol, self.opt.scoreCol, self.opt.copyCol, self.opt.incompleteReadingCol, self.opt.notRelatedCol, self.opt.otherCol ] self.positiveColumns = [self.opt.fairCol, self.opt.topQualityCol] ''' self.neutralColumns = [ self.opt.abstainCol, self.opt.lenientCol, self.opt.strictCol ] ''' self.neutralColumns = [self.opt.abstainCol] self.indicatorColumns = [self.opt.lenientCol, self.opt.strictCol] self.yellowColumns = [ self.opt.nonConstructiveCol, self.opt.scoreCol, self.opt.copyCol, self.opt.incompleteReadingCol, self.opt.notRelatedCol, self.opt.otherCol ] self.redColumns = [self.opt.profanityCol] self.feedbackColumns = self.infringementsColumns + self.positiveColumns self.allColumns = self.infringementsColumns + self.positiveColumns + self.neutralColumns + self.indicatorColumns
class CreateProposerDocument(): def __init__(self): self.options = Options() self.utils = Utils() self.gspreadWrapper = GspreadWrapper() def createDoc(self): print('Loading original...') self.gspreadWrapper.loadAssessmentsFile() print('Make a new copy...') spreadsheet = self.gspreadWrapper.gc.copy( self.options.originalExportFromIdeascale, title=self.options.proposerDocumentName ) spreadsheet.share( self.options.accountEmail, perm_type='user', role='writer' ) worksheet = spreadsheet.worksheet(self.options.assessmentsSheet) print('Setting headings for report...') # Add columns for y/r cards criteria currentColsCount = worksheet.col_count cellsToAdd = [] # Set headings headings = [ self.options.blankColumn, self.options.topQualityColumn, self.options.profanityColumn, self.options.scoreColumn, self.options.copyColumn, self.options.wrongChallengeColumn, self.options.wrongCriteriaColumn, self.options.otherColumn, self.options.otherRationaleColumn ] worksheet.add_cols(len(headings)) print('Set column width...') set_column_widths(worksheet, [ ('J:Q', 40), ('R', 200) ]) for i, value in enumerate(headings): cellsToAdd.append( Cell(row=1, col=(currentColsCount + i + 1), value=value) ) print('Mark blank assessments...') # Autofill blank assessments assessments = self.gspreadWrapper.getAssessmentsData() for note in assessments: assessment = note[self.options.assessmentColumn].strip() if (assessment == ''): col = (currentColsCount + 1) cellsToAdd.append( Cell(row=note[self.options.assessmentsIdColumn], col=col, value='x') ) worksheet.update_cells(cellsToAdd, value_input_option='USER_ENTERED') print('Document for proposers created') print('Link: {}'.format(spreadsheet.url))
def __init__(self): self.opt = Options() self.utils = Utils() self.gspreadWrapper = GspreadWrapper() print('Load vca data...') self.gspreadWrapper.getVCAMasterData() self.assessors = {} self.similarities = [] self.similarityMinScore = 0.5 self.initSimilarity()
def __init__(self): self.options = Options() self.utils = Utils() self.gspreadWrapper = GspreadWrapper() self.infringementsColumns = [ self.options.profanityColumn, self.options.scoreColumn, self.options.copyColumn, self.options.wrongChallengeColumn, self.options.wrongCriteriaColumn, self.options.otherColumn ] self.positiveColumns = [ self.options.fairColumn, self.options.topQualityColumn ] self.feedbackColumns = self.infringementsColumns + self.positiveColumns
class CreateProposerDocument(): def __init__(self): self.opt = Options() self.utils = Utils() self.gspreadWrapper = GspreadWrapper() def createDoc(self): print('Loading original...') self.gspreadWrapper.loadAssessmentsFile() proposerDf = self.gspreadWrapper.prepareDataFromExport() spreadsheet = self.gspreadWrapper.createDoc( self.opt.proposerDocumentName) # Define all the columns needed in the file headings = [ self.opt.proposalKeyCol, self.opt.ideaURLCol, self.opt.questionCol, self.opt.assessorCol, self.opt.assessmentCol, self.opt.ratingCol, self.opt.assessmentsIdCol, self.opt.tripletIdCol, self.opt.proposalIdCol, self.opt.blankCol, self.opt.topQualityCol, self.opt.profanityCol, self.opt.nonConstructiveCol, self.opt.scoreCol, self.opt.copyCol, self.opt.incompleteReadingCol, self.opt.notRelatedCol, self.opt.otherCol, self.opt.otherRationaleCol ] print('Assign blanks...') # Assign 'x' for blank assessments proposerDf[self.opt.blankCol] = proposerDf[ self.opt.assessmentCol].apply(lambda r: 'x' if (r.strip() == "") else '') print('Format columns...') widths = [('A:D', 150), ('E', 400), ('F', 60), ('G:R', 30), ('S', 400)] formats = [ ('F:R', self.utils.counterFormat), ('A1:S1', self.utils.headingFormat), ('F1:R1', self.utils.verticalHeadingFormat), ('K2:K', self.utils.greenFormat), ('L2:L', self.utils.redFormat), ('M2:R', self.utils.yellowFormat), ('A2:E', self.utils.textFormat), ] self.gspreadWrapper.createSheetFromDf(spreadsheet, "Assessments", proposerDf, headings, widths, formats) print('Document for proposers created') print('Link: {}'.format(spreadsheet.url))
class CreateSimilarity(): def __init__(self): self.opt = Options() self.utils = Utils() self.gspreadWrapper = GspreadWrapper() print('Load vca data...') self.gspreadWrapper.getVCAMasterData() self.assessors = {} self.similarities = [] self.similarityMinScore = 0.5 self.initSimilarity() def createDoc(self): self.findSimilarity() for k in self.assessors: self.assessors[k]['similarity_other_assessors'] = ','.join( list( dict.fromkeys( self.assessors[k]['similarity_other_assessors']))) self.assessors = list(self.assessors.values()) dfSimilarities = pd.DataFrame(self.similarities) dfAssessors = pd.DataFrame(self.assessors) dfSimilarities.to_csv('cache/sim5.csv') dfAssessors.to_csv('cache/sim_ass5.csv') spreadsheet = self.gspreadWrapper.createDoc('Similarity Analysis') self.gspreadWrapper.createSheetFromDf( spreadsheet, 'Assessments', dfSimilarities, [ 'id A', 'id B', 'Assessor A', 'Assessor B', 'Note A', 'Note B', 'Similarity Score' ], columnWidths=[('A:B', 50), ('C:D', 150), ('E:F', 300), ('G', 60)], formats=[('G', self.utils.counterFormat), ('A1:G1', self.utils.headingFormat)]) self.gspreadWrapper.createSheetFromDf( spreadsheet, 'CAs', dfAssessors, [ 'Assessor', 'similarity_other_assessors', 'similarity_count_others', 'similarity_count_self' ], columnWidths=[('A:B', 150), ('C:D', 60)], formats=[ ('C:D', self.utils.counterFormat), ('A1:D1', self.utils.headingFormat), ('C1:D1', self.utils.verticalHeadingFormat), ]) worksheet = spreadsheet.get_worksheet(0) spreadsheet.del_worksheet(worksheet) print('Link: {}'.format(spreadsheet.url)) def initSimilarity(self): self.vectorize = lambda Text: TfidfVectorizer().fit_transform( Text).toarray() self.similarity = lambda doc1, doc2: cosine_similarity([doc1, doc2]) def findSimilarity(self): data = self.gspreadWrapper.dfVca notes = list(data[self.opt.assessmentCol]) ids = list(data[self.opt.assessmentsIdCol]) vectors = self.vectorize(notes) s_vectors = list(zip(ids, vectors)) plagiarism_results = set() progress = 0 for assessor_a, text_vector_a in s_vectors: print("{} of {}".format(progress, len(s_vectors))) new_vectors = s_vectors.copy() current_index = new_vectors.index((assessor_a, text_vector_a)) del new_vectors[current_index] for assessor_b, text_vector_b in new_vectors: sim_score = self.similarity(text_vector_a, text_vector_b)[0][1] assessor_pair = sorted((assessor_a, assessor_b)) score = (assessor_pair[0], assessor_pair[1], sim_score) plagiarism_results.add(score) progress = progress + 1 for res in plagiarism_results: if (res[2] > self.similarityMinScore): ass_0 = data.loc[data[self.opt.assessmentsIdCol] == res[0]][ self.opt.assessorCol].item() ass_1 = data.loc[data[self.opt.assessmentsIdCol] == res[1]][ self.opt.assessorCol].item() assessment_0 = data.loc[data[self.opt.assessmentsIdCol] == res[0]][self.opt.assessmentCol].item() assessment_1 = data.loc[data[self.opt.assessmentsIdCol] == res[1]][self.opt.assessmentCol].item() if (ass_0 not in self.assessors): self.assessors[ass_0] = { 'Assessor': ass_0, 'similarity_other_assessors': [], 'similarity_count_others': 0, 'similarity_count_self': 0 } if (ass_0 != ass_1): self.assessors[ass_0]['similarity_other_assessors'].append( ass_1) self.assessors[ass_0][ 'similarity_count_others'] = self.assessors[ass_0][ 'similarity_count_others'] + 1 else: self.assessors[ass_0][ 'similarity_count_self'] = self.assessors[ass_0][ 'similarity_count_self'] + 1 self.similarities.append({ 'id A': res[0], 'id B': res[1], 'Assessor A': ass_0, 'Assessor B': ass_1, 'Note A': assessment_0, 'Note B': assessment_1, 'Similarity Score': res[2] })
def __init__(self): self.options = Options() self.gspreadWrapper = GspreadWrapper()
class createVCAAggregate(): def __init__(self): self.opt = Options() self.utils = Utils() self.gspreadWrapper = GspreadWrapper() self.infringementsColumns = [ self.opt.profanityCol, self.opt.nonConstructiveCol, self.opt.scoreCol, self.opt.copyCol, self.opt.incompleteReadingCol, self.opt.notRelatedCol, self.opt.otherCol ] self.positiveColumns = [self.opt.fairCol, self.opt.topQualityCol] ''' self.neutralColumns = [ self.opt.abstainCol, self.opt.lenientCol, self.opt.strictCol ] ''' self.neutralColumns = [self.opt.abstainCol] self.indicatorColumns = [self.opt.lenientCol, self.opt.strictCol] self.yellowColumns = [ self.opt.nonConstructiveCol, self.opt.scoreCol, self.opt.copyCol, self.opt.incompleteReadingCol, self.opt.notRelatedCol, self.opt.otherCol ] self.redColumns = [self.opt.profanityCol] self.feedbackColumns = self.infringementsColumns + self.positiveColumns self.allColumns = self.infringementsColumns + self.positiveColumns + self.neutralColumns + self.indicatorColumns def prepareBaseData(self): self.gspreadWrapper.getVCAMasterData() self.dfVca = self.gspreadWrapper.dfVca.set_index('id') # Set all counters to 0 self.dfVca[self.opt.noVCAReviewsCol] = 0 self.dfVca[self.opt.yellowCardCol] = 0 self.dfVca[self.opt.redCardCol] = 0 for col in self.allColumns: self.dfVca[col] = 0 self.gspreadWrapper.getVCAMasterAssessors() self.gspreadWrapper.dfVcaAssessors[self.opt.yellowCardCol] = 0 self.gspreadWrapper.dfVcaAssessors[self.opt.redCardCol] = 0 self.gspreadWrapper.getProposersData() def loadVCAsFiles(self): self.prepareBaseData() self.vcaData = [] self.vcaDocs = [] for vcaFile in self.opt.VCAsFiles: print(vcaFile) vcaDocument = self.gspreadWrapper.gc.open_by_key(vcaFile) try: vcaSheet = vcaDocument.worksheet("Assessments") except: vcaSheet = vcaDocument.get_worksheet(0) data = pd.DataFrame(vcaSheet.get_all_records()) data.set_index('id', inplace=True) self.vcaData.append(data) self.vcaDocs.append(vcaDocument) #sleep(35) def createDoc(self): self.loadVCAsFiles() # Loop over master ids as reference for id, row in self.dfVca.iterrows(): # Loop over all vca files for vcaDf in self.vcaData: if (id in vcaDf.index): locAss = vcaDf.loc[id] integrity = self.checkIntegrity(id, row, locAss) if (integrity is False): print('Error') break break good = self.goodFeedback(locAss) bad = self.badFeedback(locAss) neutral = self.neutralFeedback(locAss) if (self.isVCAfeedbackValid(locAss, good, bad, neutral)): if (good or bad): self.dfVca.loc[ id, self.opt.noVCAReviewsCol] = self.dfVca.loc[ id, self.opt.noVCAReviewsCol] + 1 for col in self.allColumns: colVal = self.checkIfMarked(locAss, col) if (colVal > 0): self.dfVca.loc[ id, col] = self.dfVca.loc[id, col] + colVal (yellow, red) = self.calculateCards(self.dfVca.loc[id]) self.dfVca.loc[id, self.opt.yellowCardCol] = yellow self.dfVca.loc[id, self.opt.redCardCol] = red # Extract red card assessors and update List redCards = self.dfVca[self.dfVca[self.opt.redCardCol] > 0] self.redCardsAssessors = list(redCards[self.opt.assessorCol].unique()) # Select valid assessments (no red card assessors, no yellow card assessments, no blank assessments) validAssessments = self.dfVca[( (self.dfVca[self.opt.yellowCardCol] == 0) & ~self.dfVca[self.opt.assessorCol].isin(self.redCardsAssessors))] validAssessments[self.opt.assessmentsIdCol] = validAssessments.index # Add Proposal title (getting it from Proposer doc) validAssessments[self.opt.proposalKeyCol] = validAssessments.apply( lambda x: str(self.gspreadWrapper.dfProposers.loc[ self.gspreadWrapper.dfProposers[self.opt.assessmentsIdCol] == x[self.opt.assessmentsIdCol], self.opt.proposalKeyCol].iloc[0] ), axis=1) # create group for final scores finalProposals = validAssessments.groupby( self.opt.proposalKeyCol, as_index=False)[self.opt.ratingCol].mean() # generate Assessor Recap assessors = self.assessorRecap() # generate nonValidAssessment recap nonValidAssessments = self.nonValidAssessments(validAssessments) # Generate Doc validAssessments.to_csv('cache/valid.csv') nonValidAssessments.to_csv('cache/non-valid.csv') assessors.to_csv('cache/assessors.csv') spreadsheet = self.gspreadWrapper.createDoc( self.opt.VCAAggregateFileName) # Print valid assessments assessmentsHeadings = [ self.opt.assessmentsIdCol, self.opt.tripletIdCol, self.opt.proposalKeyCol, self.opt.ideaURLCol, self.opt.proposalIdCol, self.opt.questionCol, self.opt.questionIdCol, self.opt.ratingCol, self.opt.assessorCol, self.opt.assessmentCol, self.opt.proposerMarkCol, self.opt.fairCol, self.opt.topQualityCol, self.opt.abstainCol, self.opt.strictCol, self.opt.lenientCol, self.opt.profanityCol, self.opt.nonConstructiveCol, self.opt.scoreCol, self.opt.copyCol, self.opt.incompleteReadingCol, self.opt.notRelatedCol, self.opt.otherCol, self.opt.noVCAReviewsCol, self.opt.yellowCardCol, self.opt.redCardCol ] assessmentsWidths = [('A', 40), ('B', 60), ('C', 120), ('D', 120), ('E', 40), ('F', 200), ('G', 40), ('H', 60), ('I', 120), ('J', 400), ('K:Z', 30)] assessmentsFormats = [ ('H:H', self.utils.counterFormat), ('J:J', self.utils.noteFormat), ('K:Z', self.utils.counterFormat), ('A1:X1', self.utils.headingFormat), ('B1', self.utils.verticalHeadingFormat), ('E1', self.utils.verticalHeadingFormat), ('G1:H1', self.utils.verticalHeadingFormat), ('K1:Z1', self.utils.verticalHeadingFormat), ('L2:L', self.utils.greenFormat), ('Q2:Q', self.utils.redFormat), ('R2:W', self.utils.yellowFormat), ('Y2:Y', self.utils.yellowFormat), ('Z2:Z', self.utils.redFormat), ] self.gspreadWrapper.createSheetFromDf(spreadsheet, 'Valid Assessments', validAssessments, assessmentsHeadings, columnWidths=assessmentsWidths, formats=assessmentsFormats) # Print assessors recap # Write sheet with CAs summary self.gspreadWrapper.createSheetFromDf( spreadsheet, 'Community Advisors', assessors, [ 'assessor', 'total', 'blanks', 'blankPercentage', 'excluded', 'Yellow Card', 'Red Card', 'Constructive Feedback', 'note' ], columnWidths=[('A', 140), ('B:D', 60), ('E', 100), ('F:H', 40), ('I', 200)], formats=[ ('B:C', self.utils.counterFormat), ('F:H', self.utils.counterFormat), ('D2:D', self.utils.percentageFormat), ('A1:E1', self.utils.headingFormat), ('B1:D1', self.utils.verticalHeadingFormat), ('F1:H1', self.utils.verticalHeadingFormat), ('F2:F', self.utils.yellowFormat), ('G2:G', self.utils.redFormat), ('H2:H', self.utils.greenFormat), ]) # Print non valid assessments -> reason, extract from proposer Doc nonAssessmentsHeadings = [ self.opt.assessmentsIdCol, self.opt.tripletIdCol, self.opt.ideaURLCol, self.opt.proposalIdCol, self.opt.questionCol, self.opt.ratingCol, self.opt.assessorCol, self.opt.assessmentCol, self.opt.blankCol, self.opt.proposerMarkCol, self.opt.fairCol, self.opt.topQualityCol, self.opt.abstainCol, self.opt.strictCol, self.opt.lenientCol, self.opt.profanityCol, self.opt.nonConstructiveCol, self.opt.scoreCol, self.opt.copyCol, self.opt.incompleteReadingCol, self.opt.notRelatedCol, self.opt.otherCol, self.opt.noVCAReviewsCol, self.opt.yellowCardCol, self.opt.redCardCol, 'reason' ] nonAssessmentsWidths = [('A', 40), ('B', 60), ('C', 120), ('D', 40), ('E', 200), ('F', 40), ('G', 120), ('H', 400), ('I:Y', 30), ('Z', 200)] nonAssessmentsFormats = [ ('G:G', self.utils.counterFormat), ('H:H', self.utils.noteFormat), ('I:Y', self.utils.counterFormat), ('A1:W1', self.utils.headingFormat), ('B1', self.utils.verticalHeadingFormat), ('D1', self.utils.verticalHeadingFormat), ('F1:G1', self.utils.verticalHeadingFormat), ('I1:Y1', self.utils.verticalHeadingFormat), ('K2:L', self.utils.greenFormat), ('P2:P', self.utils.redFormat), ('Q2:V', self.utils.yellowFormat), ('X2:X', self.utils.yellowFormat), ('Y2:Y', self.utils.redFormat), ] self.gspreadWrapper.createSheetFromDf( spreadsheet, 'Excluded Assessments', nonValidAssessments, nonAssessmentsHeadings, columnWidths=nonAssessmentsWidths, formats=nonAssessmentsFormats) # Print vca recap allVcas = [] for vcaDoc in self.vcaDocs: allVcas.append({'title': vcaDoc.title, 'link': vcaDoc.url}) allVcasDf = pd.DataFrame(allVcas) self.gspreadWrapper.createSheetFromDf(spreadsheet, 'Veteran Community Advisors', allVcasDf) proposalsWidths = [('A', 300), ('B', 60)] proposalsFormats = [('B:B', self.utils.counterFormat), ('A:A', self.utils.noteFormat), ('A1:B1', self.utils.headingFormat), ('B1', self.utils.verticalHeadingFormat)] self.gspreadWrapper.createSheetFromDf( spreadsheet, 'Proposals', finalProposals, [self.opt.proposalKeyCol, self.opt.ratingCol], columnWidths=proposalsWidths, formats=proposalsFormats) print('Aggregated Document created') print('Link: {}'.format(spreadsheet.url)) def nonValidAssessments(self, validAssessments): self.gspreadWrapper.getProposersData() dfProposers = self.gspreadWrapper.dfProposers.set_index('id') dfProposers[self.opt.assessmentsIdCol] = dfProposers.index for col in self.allColumns: dfProposers[col] = '' dfProposers[self.opt.proposerMarkCol] = '' dfProposers[self.opt.otherRationaleCol] = '' nonValidAssessments = dfProposers[~dfProposers[ self.opt.assessmentsIdCol].isin(validAssessments.index)].copy() for id, row in nonValidAssessments.iterrows(): if (id in self.dfVca.index): for col in self.allColumns: nonValidAssessments.loc[id, col] = int(self.dfVca.loc[id, col]) nonValidAssessments.loc[id, self.opt.yellowCardCol] = int( self.dfVca.loc[id, self.opt.yellowCardCol]) nonValidAssessments.loc[id, self.opt.redCardCol] = int( self.dfVca.loc[id, self.opt.redCardCol]) nonValidAssessments.loc[id, self.opt.noVCAReviewsCol] = int( self.dfVca.loc[id, self.opt.noVCAReviewsCol]) nonValidAssessments['reason'] = nonValidAssessments.apply( self.describeReason, axis=1) nonValidAssessments.fillna('', inplace=True) return nonValidAssessments def describeReason(self, row): reason = [] if (row[self.opt.blankCol] == 'x'): reason.append(self.opt.blankCol) if (row['id'] in self.dfVca.index): tot = row[self.opt.noVCAReviewsCol] for col in self.infringementsColumns: if (tot > 0): if ((row[col] / tot) >= self.opt.cardLimit): reason.append(col) excludedAssessors = list(self.gspreadWrapper.dfVcaAssessors[ self.gspreadWrapper.dfVcaAssessors['excluded'] == 'TRUE'] ['assessor']) if (row[self.opt.assessorCol] in excludedAssessors): reason.append('Assessor excluded') return ', '.join(reason) def assessorRecap(self): self.gspreadWrapper.dfVcaAssessors.loc[ self.gspreadWrapper.dfVcaAssessors['assessor']. isin(self.redCardsAssessors), 'excluded'] = 'TRUE' self.gspreadWrapper.dfVcaAssessors.loc[ self.gspreadWrapper.dfVcaAssessors['assessor']. isin(self.redCardsAssessors), 'note'] = "red card" # Extract assessors locAssessors = self.dfVca.groupby(self.opt.assessorCol).agg( constructiveFeedback=(self.opt.topQualityCol, 'sum'), red=(self.opt.redCardCol, 'sum'), yellow=(self.opt.yellowCardCol, 'sum'), ) for id, row in locAssessors.iterrows(): self.gspreadWrapper.dfVcaAssessors.loc[ self.gspreadWrapper.dfVcaAssessors['assessor'] == id, self.opt.redCardCol] = row['red'] self.gspreadWrapper.dfVcaAssessors.loc[ self.gspreadWrapper.dfVcaAssessors['assessor'] == id, self.opt.yellowCardCol] = row['yellow'] self.gspreadWrapper.dfVcaAssessors.loc[ self.gspreadWrapper.dfVcaAssessors['assessor'] == id, self.opt.topQualityCol] = row['constructiveFeedback'] self.gspreadWrapper.dfVcaAssessors.fillna('', inplace=True) return self.gspreadWrapper.dfVcaAssessors def checkIntegrity(self, id, ass1, ass2): if ((ass1[self.opt.proposalIdCol] != ass2[self.opt.proposalIdCol]) or (ass1[self.opt.questionIdCol] != ass2[self.opt.questionIdCol]) or (ass1[self.opt.ratingCol] != ass2[self.opt.ratingCol]) or (ass1[self.opt.assessorCol] != ass2[self.opt.assessorCol])): print("Something wrong with assessment {}".format(id)) return False return True def checkIfMarked(self, row, column): if (row[column].strip() != ''): return 1 return 0 def calculateCards(self, row): yellow = 0 red = 0 tot = row[self.opt.noVCAReviewsCol] if (tot >= self.opt.minimumVCA): if ((row[self.opt.profanityCol] / tot) >= self.opt.cardLimit): red = red + 1 for col in self.yellowColumns: if ((row[col] / tot) >= self.opt.cardLimit): yellow = yellow + 1 return (yellow, red) def goodFeedback(self, row): for col in self.positiveColumns: if (self.checkIfMarked(row, col) > 0): return True return False def badFeedback(self, row): for col in self.infringementsColumns: if (self.checkIfMarked(row, col) > 0): return True return False def badValid(self, row): if ((self.checkIfMarked(row, self.opt.otherCol) == 1) and (self.checkIfMarked(row, self.opt.otherRationaleCol) == 0)): return False return True def neutralFeedback(self, row): for col in self.neutralColumns: if (self.checkIfMarked(row, col) > 0): return True return False def isVCAfeedbackValid(self, row, good, bad, neutral): if (bad): if (self.badValid(row) is False): return False if (sum([good, bad, neutral]) <= 1): return True return False
def __init__(self): self.opt = Options() self.utils = Utils() self.gspreadWrapper = GspreadWrapper()
class CreateVCAMaster(): def __init__(self): self.options = Options() self.utils = Utils() self.gspreadWrapper = GspreadWrapper() def createDoc(self): print('Create new document...') spreadsheet = self.gspreadWrapper.gc.create( self.options.VCAMasterFileName) spreadsheet.share(self.options.accountEmail, perm_type='user', role='writer') print('Create sheet...') worksheet = spreadsheet.get_worksheet(0) worksheet.update_title("Assessments") cellsToAdd = [] # Set headings print('Set headings...') headings = [ self.options.assessmentsIdColumn, self.options.ideaURLColumn, self.options.questionColumn, self.options.ratingColumn, self.options.assessorColumn, self.options.assessmentColumn, self.options.proposerMarkColumn, self.options.fairColumn, self.options.topQualityColumn, self.options.profanityColumn, self.options.scoreColumn, self.options.copyColumn, self.options.wrongChallengeColumn, self.options.wrongCriteriaColumn, self.options.otherColumn, self.options.otherRationaleColumn ] for i, value in enumerate(headings): cellsToAdd.append(Cell(row=1, col=(i + 1), value=value)) print('Set column width...') set_column_widths(worksheet, [('A', 40), ('B:C', 200), ('D', 40), ('E', 120), ('F', 400), ('G:O', 30), ('P', 300)]) print('Format columns') noteFormat = cellFormat(wrapStrategy='CLIP') flagFormat = cellFormat(textFormat=textFormat(bold=True), horizontalAlignment='CENTER') format_cell_ranges(worksheet, [('D:D', flagFormat), ('F:F', noteFormat), ('G:O', flagFormat)]) print('Load proposers flagged reviews...') assessments = self.gspreadWrapper.getProposersData() # extract Assessors assessors = self.gspreadWrapper.groupByAssessor(assessments) # filter assessors with more than allowed blank reviews. excludedAssessors = [ k for k in assessors if (assessors[k]['blankPercentage'] >= self.options.allowedBlankPerAssessor) ] includedAssessors = [ k for k in assessors if (assessors[k]['blankPercentage'] < self.options.allowedBlankPerAssessor) ] proposersDoc = self.gspreadWrapper.gc.open_by_key( self.options.proposersFile) self.gspreadWrapper.createSheetFromGroup(proposersDoc, 'Excluded CAs', assessors, excludedAssessors, ['assessments']) self.gspreadWrapper.createSheetFromGroup(proposersDoc, 'Included CAs', assessors, includedAssessors, ['assessments']) # Add sheet for excluded/included assessors index = 2 print('Cloning flagged reviews...') for assessment in assessments: if (assessment[self.options.assessorColumn] not in excludedAssessors): marked = 'x' if ( (assessment[self.options.profanityColumn] == 'x') or (assessment[self.options.scoreColumn] == 'x') or (assessment[self.options.copyColumn] == 'x') or (assessment[self.options.wrongChallengeColumn] == 'x') or (assessment[self.options.wrongCriteriaColumn] == 'x') or (assessment[self.options.otherColumn] == 'x')) else '' cellsToAdd.extend([ Cell(row=index, col=1, value=assessment[self.options.assessmentsIdColumn]), Cell(row=index, col=2, value=assessment[self.options.ideaURLColumn]), Cell(row=index, col=3, value=assessment[self.options.questionColumn]), Cell(row=index, col=4, value=assessment[self.options.ratingColumn]), Cell(row=index, col=5, value=assessment[self.options.assessorColumn]), Cell(row=index, col=6, value=assessment[self.options.assessmentColumn]), Cell(row=index, col=7, value=marked) ]) index = index + 1 worksheet.update_cells(cellsToAdd, value_input_option='USER_ENTERED') print('Master Document for vCAs created') print('Link: {}'.format(spreadsheet.url))
class createVCAAggregate(): def __init__(self): self.options = Options() self.utils = Utils() self.gspreadWrapper = GspreadWrapper() self.infringementsColumns = [ self.options.profanityColumn, self.options.scoreColumn, self.options.copyColumn, self.options.wrongChallengeColumn, self.options.wrongCriteriaColumn, self.options.otherColumn ] self.positiveColumns = [ self.options.fairColumn, self.options.topQualityColumn ] self.feedbackColumns = self.infringementsColumns + self.positiveColumns def loadVCAsFiles(self): masterDocument = self.gspreadWrapper.gc.open_by_key(self.options.VCAMasterFile) masterSheet = masterDocument.worksheet("Assessments") masterData = masterSheet.get_all_records() self.masterDataByIds = self.gspreadWrapper.groupById(masterData) self.vcaData = [] for vcaFile in self.options.VCAsFiles: vcaDocument = self.gspreadWrapper.gc.open_by_key(vcaFile) vcaSheet = vcaDocument.worksheet("Assessments") data = vcaSheet.get_all_records() dataByIds = self.gspreadWrapper.groupById(data) self.vcaData.append(dataByIds) def createDoc(self): self.loadVCAsFiles() print('Create new document...') spreadsheet = self.gspreadWrapper.gc.create(self.options.VCAAggregateFileName) spreadsheet.share( self.options.accountEmail, perm_type='user', role='writer' ) print('vCA aggregate document created.') print('Create sheet...') excludedAssessors = [] validAssessments = [] yellowAssessments = [] assessments = [] # Loop over master ids as reference for id in self.masterDataByIds: assessment = {} assessment[self.options.assessmentsIdColumn] = id assessment[self.options.assessorColumn] = self.masterDataByIds[id][self.options.assessorColumn] assessment[self.options.noVCAReviewsColumn] = 0 assessment[self.options.fairColumn] = 0 assessment[self.options.topQualityColumn] = 0 for col in self.infringementsColumns: assessment[col] = 0 assessment[self.options.yellowCardColumn] = 0 assessment[self.options.redCardColumn] = 0 # Loop over all vca files for vcaFile in self.vcaData: fair = self.checkIfMarked(vcaFile[id], self.options.fairColumn) if (self.isVCAfeedbackValid(fair, vcaFile[id])): assessment[self.options.noVCAReviewsColumn] = assessment[self.options.noVCAReviewsColumn] + self.checkIfReviewed(vcaFile[id]) assessment[self.options.fairColumn] = assessment[self.options.fairColumn] + fair assessment[self.options.topQualityColumn] = assessment[self.options.topQualityColumn] + self.checkIfMarked(vcaFile[id], self.options.topQualityColumn) for col in self.infringementsColumns: assessment[col] = assessment[col] + self.checkIfMarked(vcaFile[id], col) (yellow, red) = self.calculateCards(assessment) assessment[self.options.yellowCardColumn] = yellow assessment[self.options.redCardColumn] = red if (red >= 1): excludedAssessors.append(assessment[self.options.assessorColumn]) if (yellow >= 1): yellowAssessments.append(assessment) assessments.append(assessment) validAssessments = self.filterAssessments(yellowAssessments, excludedAssessors) self.gspreadWrapper.createSheetFromList( spreadsheet, 'Aggregated', assessments, [] ) self.gspreadWrapper.createSheetFromList( spreadsheet, 'Valid Assessments', validAssessments, [ self.options.proposerMarkColumn, self.options.fairColumn, self.options.topQualityColumn, self.options.profanityColumn, self.options.scoreColumn, self.options.copyColumn, self.options.wrongChallengeColumn, self.options.wrongCriteriaColumn, self.options.otherColumn, self.options.otherRationaleColumn ] ) worksheet = spreadsheet.get_worksheet(0) spreadsheet.del_worksheet(worksheet) print('Aggregated document created') print('Link: {}'.format(spreadsheet.url)) def checkIfReviewed(self, row): result = False for col in self.feedbackColumns: result = result or (row[col] == 'x') if (result): return 1 return 0 def checkIfMarked(self, row, column): if (row[column] == 'x'): return 1 return 0 def calculateCards(self, row): yellow = 0 red = 0 tot = row[self.options.noVCAReviewsColumn] if (tot >= self.options.minimumVCA): if ((row[self.options.profanityColumn]/tot) >= self.options.profanityLimit): red = red + 1 if ((row[self.options.scoreColumn]/tot) >= self.options.scoreLimit): yellow = yellow + 1 if ((row[self.options.copyColumn]/tot) >= self.options.copyLimit): yellow = yellow + 1 if ((row[self.options.wrongChallengeColumn]/tot) >= self.options.wrongChallengeLimit): yellow = yellow + 1 if ((row[self.options.wrongCriteriaColumn]/tot) >= self.options.wrongCriteriaLimit): yellow = yellow + 1 if ((row[self.options.otherColumn]/tot) >= self.options.otherLimit): yellow = yellow + 1 return (yellow, red) def isVCAfeedbackValid(self, fairCount, row): if (fairCount == 1): for col in self.infringementsColumns: if (self.checkIfMarked(row, col) > 0): return False return True def filterAssessments(self, yellowAssessments, excludedAssessors): filtered = [] yellowRelatedTripletsIds = self.getRelatedTriplets(yellowAssessments) assessments = self.masterDataByIds for id in assessments: if id not in yellowRelatedTripletsIds: if (assessments[id][self.options.assessorColumn] not in excludedAssessors): filtered.append(assessments[id]) return filtered def getRelatedTriplets(self, yellowAssessments): return []
class CreateVCAMaster(): def __init__(self): self.opt = Options() self.utils = Utils() self.gspreadWrapper = GspreadWrapper() def createDoc(self): spreadsheet = self.gspreadWrapper.createDoc(self.opt.VCAMasterFileName) # Define headings for VCAMasterFile print('Define headings...') headings = [ self.opt.assessmentsIdCol, self.opt.tripletIdCol, self.opt.ideaURLCol, self.opt.proposalIdCol, self.opt.questionCol, self.opt.questionIdCol, self.opt.ratingCol, self.opt.assessorCol, self.opt.assessmentCol, self.opt.proposerMarkCol, self.opt.fairCol, self.opt.topQualityCol, self.opt.abstainCol, self.opt.strictCol, self.opt.lenientCol, self.opt.profanityCol, self.opt.nonConstructiveCol, self.opt.scoreCol, self.opt.copyCol, self.opt.incompleteReadingCol, self.opt.notRelatedCol, self.opt.otherCol, self.opt.otherRationaleCol ] print('Load proposers flagged reviews...') self.gspreadWrapper.getProposersData() #self.gspreadWrapper.dfProposers.to_csv('test.csv') # Extract assessors assessors = self.gspreadWrapper.dfProposers.groupby( self.opt.assessorCol).agg(total=(self.opt.assessmentCol, 'count'), blanks=(self.opt.blankCol, (lambda x: (x == 'x').sum()))) # Calculate and extract assessors by blanks assessors['blankPercentage'] = assessors['blanks'] / assessors['total'] assessors['excluded'] = (assessors['blankPercentage'] >= self.opt.allowedBlankPerAssessor) excludedAssessors = assessors[( assessors['excluded'] == True)].index.tolist() includedAssessors = assessors[(assessors['excluded'] != True)].index.tolist() # Exclude assessors that are also proposers (get from options) includedAssessors = [ x for x in includedAssessors if (x not in self.opt.excludedCAProposers) ] excludedAssessors.extend(self.opt.excludedCAProposers) assessors['assessor'] = assessors.index # Filter out assessments made by excluded assessors validAssessments = self.gspreadWrapper.dfProposers[ self.gspreadWrapper.dfProposers[self.opt.assessorCol].isin( includedAssessors)] # Filter out blank assessments validAssessments = validAssessments[ validAssessments[self.opt.blankCol] != 'x'] # Remove proposers marks criteria = self.gspreadWrapper.infringementsColumns + [ self.opt.topQualityCol, self.opt.otherRationaleCol ] for col in criteria: validAssessments[col] = '' # Assign 'x' for marks validAssessments[self.opt.proposerMarkCol] = validAssessments[ self.opt.proposerMarkCol].apply(lambda r: 'x' if (r) else '') # Write sheet with assessments assessmentsWidths = [('A', 40), ('B', 60), ('D', 40), ('E', 200), ('F', 40), ('G', 60), ('H', 120), ('I', 400), ('J:V', 30), ('W', 300)] assessmentsFormats = [ ('G:G', self.utils.counterFormat), ('I:I', self.utils.noteFormat), ('J:V', self.utils.counterFormat), ('A1:W1', self.utils.headingFormat), ('B1', self.utils.verticalHeadingFormat), ('D1', self.utils.verticalHeadingFormat), ('F1:G1', self.utils.verticalHeadingFormat), ('J1:V1', self.utils.verticalHeadingFormat), ('K2:L', self.utils.greenFormat), ('P2:P', self.utils.redFormat), ('Q2:V', self.utils.yellowFormat), ] self.gspreadWrapper.createSheetFromDf(spreadsheet, 'Assessments', validAssessments, headings, columnWidths=assessmentsWidths, formats=assessmentsFormats) # Write sheet with CAs summary self.gspreadWrapper.createSheetFromDf( spreadsheet, 'Community Advisors', assessors, ['assessor', 'total', 'blanks', 'blankPercentage', 'excluded'], columnWidths=[('A', 140), ('B:D', 60), ('E', 100)], formats=[ ('B:C', self.utils.counterFormat), ('D2:D', self.utils.percentageFormat), ('A1:E1', self.utils.headingFormat), ('B1:D1', self.utils.verticalHeadingFormat), ]) print('Master Document for vCAs created') print('Link: {}'.format(spreadsheet.url))