def merge_answers(self, question, answers, merge_to=None): """ Merge one or more answer values together, keeping the sequence of answers the same. References to the merged answers in criteria or caveats will be replaced with a reference to the 'merge_to' answer. examples: self.merge_answers(53, ['low-medium', 'medium'], merge_to='medium') will change all entries with 'low-medium' answer values to 'medium'. self.merge_answers(53, ['low-medium', 'medium'], merge_to='high') will change all entries with 'low-medium' or 'medium' answer values to 'high'. If merge_to is omitted, defaults to the first answer listed in the answers argument. self.merge_answers(53, ['low-medium', 'medium']) will change all entries with 'medium' answer values to 'low-medium'. In all cases the merged answers are subsequently deleted. pandas was a really terrible choice, internally. :param question: :param answers: :param merge_to: :return: """ if isinstance(answers, str): answers = [answers] cur = self._questions[question].valid_answers ans_ind = [indices(cur, lambda x: x == ans)[0] for ans in answers] if merge_to is None: merge_ind = ans_ind[0] else: merge_ind = indices(cur, lambda x: x == merge_to)[0] mapping = range(len(cur)) for i in mapping: if i in ans_ind: mapping[i] = merge_ind print ("Merging answers into %s:" % cur[merge_ind]) for i in ans_ind: print (" %s" % cur[i]) if ifinput("Really continue?", "y") != "y": print ("NOT merged.") return new_cri, new_cav = self._remap_answers(question, mapping) self._criteria = new_cri self._caveats = new_cav for i in ans_ind: if i != merge_ind: self.delete_answer(question, cur[i])
def merge_answers(self, question, answers, merge_to=None): """ Merge one or more answer values together, keeping the sequence of answers the same. References to the merged answers in criteria or caveats will be replaced with a reference to the 'merge_to' answer. examples: self.merge_answers(53, ['low-medium', 'medium'], merge_to='medium') will change all entries with 'low-medium' answer values to 'medium'. self.merge_answers(53, ['low-medium', 'medium'], merge_to='high') will change all entries with 'low-medium' or 'medium' answer values to 'high'. If merge_to is omitted, defaults to the first answer listed in the answers argument. self.merge_answers(53, ['low-medium', 'medium']) will change all entries with 'medium' answer values to 'low-medium'. In all cases the merged answers are subsequently deleted. pandas was a really terrible choice, internally. :param question: :param answers: :param merge_to: :return: """ if isinstance(answers, str): answers = [answers] cur = self._questions[question].valid_answers ans_ind = [indices(cur, lambda x: x == ans)[0] for ans in answers] if merge_to is None: merge_ind = ans_ind[0] else: merge_ind = indices(cur, lambda x: x == merge_to)[0] mapping = range(len(cur)) for i in mapping: if i in ans_ind: mapping[i] = merge_ind print('Merging answers into %s:' % cur[merge_ind]) for i in ans_ind: print(' %s' % cur[i]) new_cri, new_cav = self._remap_answers(question, mapping) self._criteria = new_cri self._caveats = new_cav for i in ans_ind: if i != merge_ind: self.delete_answer(question, cur[i])
def _cri_for_record(self, index, record="question", answer=None): fieldname = {"question": "QuestionID", "target": "TargetID"}[record] criteria = self._criteria[self._criteria[fieldname] == index].copy() # TODO: make this return 'satisfies' entries as well if answer is not None: if record == "target": print ("Ignoring answer value for target-based query") else: answer_value = indices(self._questions[index].valid_answers, lambda x: x == answer) if len(answer_value) != 1: print ("Got spurious answer matching results: %s" % answer_value) else: # filter to only passing answers criteria = criteria[criteria["Threshold"] <= answer_value] # re-encode answer value with answer text (and call it Answer Value) self._replace_field_with_answer(criteria) return criteria
def _cri_for_record(self, index, record='question', answer=None): fieldname = {'question': 'QuestionID', 'target': 'TargetID'}[record] criteria = self._criteria[self._criteria[fieldname] == index].copy() # TODO: make this return 'satisfies' entries as well if answer is not None: if record == 'target': print('Ignoring answer value for target-based query') else: answer_value = indices(self._questions[index].valid_answers, lambda x: x == answer) if len(answer_value) != 1: print('Got spurious answer matching results: %s' % answer_value) else: # filter to only passing answers criteria = criteria[criteria['Threshold'] >= answer_value] # re-encode answer value with answer text (and call it Answer Value) self._replace_field_with_answer(criteria) return criteria
def __init__(self): """ :return: """ spreadsheet_data = load_default_set() # TODO: make this flexible attribute_set = spreadsheet_data.Attributes note_set = spreadsheet_data.Notations t_a_targets = [] t_a_attrs = [] q_a_questions = [] q_a_attrs = [] cri_questions = [] cri_thresholds = [] cri_targets = [] cav_questions = [] cav_targets = [] cav_answers = [] cav_notes = [] # create mapping of dict keys to series q_index = 0 q_dict = dict() questions = spreadsheet_data.Questions.iterkeys() for k in sorted(questions): if k not in q_dict: # if it's not mapped, map it and all its synonyms q_dict[k] = q_index v = spreadsheet_data.Questions[k] if len(v.synonyms) > 0: for syn in v.synonyms: q_dict[syn] = q_index q_index += 1 else: # already in the dict- nothing to do pass # create target_enum target_enum = [] t_dict = dict() targets = spreadsheet_data.Targets.iterkeys() for k in sorted(targets): t_index = len(target_enum) v = spreadsheet_data.Targets[k] target_enum.append(MsspTarget(v)) t_dict[k] = t_index for a in v.attrs: t_a_targets.append(t_index) t_a_attrs.append(attribute_set.get_index(a)) # create question_enum question_enum = [] for i in range(0, q_index): question_enum.append(MsspQuestion()) # populate question_enum, criteria, caveats for k, v in spreadsheet_data.Questions.iteritems(): q_i = q_dict[k] question_enum[q_i].append(v, q_dict) my_valid_answers = [ cast_answer(ans) for ans in question_enum[q_i].valid_answers ] for attr in v.attrs: q_a_questions.append(q_i) q_a_attrs.append(attribute_set.get_index(attr)) for cross_index, element in v.criteria_mappings: # criteria_mappings is (cross-index, element) # lookup target index t_i = t_dict[(v.selector, cross_index)] # lookup threshold answer = cast_answer( element.text) # convert to 'Yes' / 'No' if applicable thresh = indices(my_valid_answers, lambda z: answer == z) if len(thresh) == 0: print "QuestionID {0}, TargetID {1}, text {2}: no threshold found.".format( q_i, t_i, element.text) print " valid answers: %s" % my_valid_answers thresh = [None] cri_questions.append(q_i) cri_targets.append(t_i) cri_thresholds.append(thresh[0]) for answer, mapping in v.caveat_mappings: # caveat_mappings is (answer, (cross-index, element)) cross_index, element = mapping # lookup target index t_i = t_dict[(v.selector, cross_index)] # lookup answer sense answer = cast_answer( answer) # convert to 'Yes' / 'No' if applicable ans_i = indices(my_valid_answers, lambda z: answer == z) n_i = note_set.get_index(element) cav_questions.append(q_i) cav_targets.append(t_i) if len(ans_i) > 0: cav_answers.append(ans_i[0]) else: print "QuestionID {0}, TargetID {1}, cast answer '{2}' unparsed.".format( q_i, t_i, answer) print " valid answers: %s" % my_valid_answers cav_answers.append(None) cav_notes.append(n_i) # create pandas tables question_attributes = pd.DataFrame({ "QuestionID": q_a_questions, "AttributeID": q_a_attrs }).drop_duplicates() target_attributes = pd.DataFrame({ "TargetID": t_a_targets, "AttributeID": t_a_attrs }) criteria = pd.DataFrame({ "QuestionID": cri_questions, "Threshold": cri_thresholds, "TargetID": cri_targets }) caveats = pd.DataFrame({ "QuestionID": cav_questions, "TargetID": cav_targets, "Answer": cav_answers, "NoteID": cav_notes }) super(XlsImporter, self).__init__(attribute_set, note_set, question_enum, target_enum, question_attributes, target_attributes, criteria, caveats, spreadsheet_data.colormap)
def __init__(self, file_ref): """ Constructs an MsspDataStore object from a collection of JSON dictionaries. :param file_ref: file or directory containing the json data :return: an MsspDataStore """ json_in = read_json(file_ref) # first thing to do is build the attribute and note lists colormap = pd.DataFrame(json_in['colormap']) attribute_set = SemanticElementSet.from_json(json_in['attributes']) note_set = SemanticElementSet.from_json(json_in['notes'], colormap=colormap) question_enum = [None] * (1 + max([i['QuestionID'] for i in json_in['questions']])) target_enum = [None] * (1 + max([i['TargetID'] for i in json_in['targets']])) t_a_targets = [] t_a_attrs = [] q_a_questions = [] q_a_attrs = [] cri_questions = [] cri_thresholds = [] cri_targets = [] cav_questions = [] cav_targets = [] cav_answers = [] cav_notes = [] for t in json_in['targets']: # need to preserve IDs because of criteria and caveat maps t_index = t['TargetID'] try: target_enum[t_index] = MsspTarget.from_json(t) except IndexError: print 'Index error at {0}'.format(t_index) print t # add _attributes to element set and build mapping for a in t['Attributes']: # a is a text string t_a_targets.append(t_index) t_a_attrs.append(uuid.UUID(a)) for q in json_in['questions']: # need to preserve IDs because of criteria and caveat maps q_index = q['QuestionID'] question_enum[q_index] = MsspQuestion.from_json(q) # add _attributes to element set and build mapping for a in q['Attributes']: # a is a text string q_a_questions.append(q_index) q_a_attrs.append(uuid.UUID(a)) for cri in json_in['criteria']: # the threshold is a literal entry from the question's valid_answers- # needs to be converted into an index q_index = cri['QuestionID'] t_index = cri['TargetID'] thresh = indices(question_enum[q_index].valid_answers, lambda k: cri['Threshold'] == k) if len(thresh) == 0: print "QuestionID {0}, TargetID {1}, text {2}: no threshold found.".format( q_index, t_index, cri['Threshold']) thresh = [None] cri_questions.append(q_index) cri_targets.append(t_index) cri_thresholds.append(thresh[0]) _cav_detect_flag = False for cav in json_in['caveats']: # the answer is a literal entry from the question's valid answers- # needs to be converted into an index. # the color needs to be converted into a colormap RGB. q_index = cav['QuestionID'] t_index = cav['TargetID'] if question_enum[q_index] is None: print "Question {0} is none! Skipping this entry".format(q_index) elif 'Answers' in cav: # new way for ans in cav['Answers']: if 'NoteID' in ans: note_id = uuid.UUID(ans['NoteID']) ans_i = indices(question_enum[q_index].valid_answers, lambda k: ans['Answer'] == k) if len(ans_i) == 0: print "QuestionID {0}, TargetID {1}, valid answer '{2}' unparsed.".format( q_index, t_index, ans['Answer']) ans_i = [None] cav_questions.append(q_index) cav_targets.append(t_index) cav_answers.append(ans_i[0]) cav_notes.append(note_id) else: # old way # the answer is a literal entry from the question's valid answers- # needs to be converted into an index. # the color needs to be converted into a colormap RGB. if _cav_detect_flag is False: print 'Loading old-style Caveats' _cav_detect_flag = True q_index = cav['QuestionID'] t_index = cav['TargetID'] note_id = uuid.UUID(cav['NoteID']) if question_enum[q_index] is None: print "Question {0} is none!".format(q_index) ans_i = [None] else: ans_i = indices(question_enum[q_index].valid_answers, lambda k: cav['Answer'] == k) if len(ans_i) == 0: print "QuestionID {0}, TargetID {1}, valid answer '{2}' unparsed.".format( q_index, t_index, cav['Answer']) ans_i = [None] cav_questions.append(q_index) cav_targets.append(t_index) cav_answers.append(ans_i[0]) cav_notes.append(note_id) # create pandas tables question_attributes = pd.DataFrame( { "QuestionID": q_a_questions, "AttributeID": q_a_attrs } ).drop_duplicates() target_attributes = pd.DataFrame( { "TargetID": t_a_targets, "AttributeID": t_a_attrs } ) criteria = pd.DataFrame( { "QuestionID": cri_questions, "Threshold": cri_thresholds, "TargetID": cri_targets } ) caveats = pd.DataFrame( { "QuestionID": cav_questions, "TargetID": cav_targets, "Answer": cav_answers, "NoteID": cav_notes } ) super(JsonImporter, self).__init__( attribute_set, note_set, question_enum, target_enum, question_attributes, target_attributes, criteria, caveats, colormap)
def __init__(self): """ :return: """ spreadsheet_data = load_default_set() # TODO: make this flexible attribute_set = SemanticElementSet.from_element_set(spreadsheet_data.Attributes) note_set = SemanticElementSet.from_element_set(spreadsheet_data.Notations) t_a_targets = [] t_a_attrs = [] q_a_questions = [] q_a_attrs = [] cri_questions = [] cri_thresholds = [] cri_targets = [] cav_questions = [] cav_targets = [] cav_answers = [] cav_notes = [] # create mapping of dict keys to series q_index = 0 q_dict = dict() questions = spreadsheet_data.Questions.iterkeys() for k in sorted(questions): if k not in q_dict: # if it's not mapped, map it and all its synonyms q_dict[k] = q_index v = spreadsheet_data.Questions[k] if len(v.synonyms) > 0: for syn in v.synonyms: q_dict[syn] = q_index q_index += 1 else: # already in the dict- nothing to do pass # create target_enum target_enum = [] t_dict = dict() targets = spreadsheet_data.Targets.iterkeys() for k in sorted(targets): t_index = len(target_enum) v = spreadsheet_data.Targets[k] target_enum.append(MsspTarget(v)) t_dict[k] = t_index for a in v.attrs: t_a_targets.append(t_index) t_a_attrs.append(attribute_set.get_index(a)) # create question_enum question_enum = [] for i in range(0, q_index): question_enum.append(MsspQuestion()) # populate question_enum, criteria, caveats for k, v in spreadsheet_data.Questions.iteritems(): q_i = q_dict[k] question_enum[q_i].append(v, q_dict) my_valid_answers = [cast_answer(ans) for ans in question_enum[q_i].valid_answers] for attr in v.attrs: q_a_questions.append(q_i) q_a_attrs.append(attribute_set.get_index(attr)) for cross_index, element in v.criteria_mappings: # criteria_mappings is (cross-index, element) # lookup target index t_i = t_dict[(v.selector, cross_index)] # lookup threshold answer = cast_answer(element.text) # convert to 'Yes' / 'No' if applicable thresh = indices(my_valid_answers, lambda z: answer == z) if len(thresh) == 0: print "QuestionID {0}, TargetID {1}, text {2}: no threshold found.".format( q_i, t_i, element.text) print " valid answers: %s" % my_valid_answers thresh = [None] cri_questions.append(q_i) cri_targets.append(t_i) cri_thresholds.append(thresh[0]) for answer, mapping in v.caveat_mappings: # caveat_mappings is (answer, (cross-index, element)) cross_index, element = mapping # lookup target index t_i = t_dict[(v.selector, cross_index)] # lookup answer sense answer = cast_answer(answer) # convert to 'Yes' / 'No' if applicable ans_i = indices(my_valid_answers, lambda z: answer == z) n_i = note_set.get_index(element) cav_questions.append(q_i) cav_targets.append(t_i) if len(ans_i) > 0: cav_answers.append(ans_i[0]) else: print "QuestionID {0}, TargetID {1}, cast answer '{2}' unparsed.".format( q_i, t_i, answer) print " valid answers: %s" % my_valid_answers cav_answers.append(None) cav_notes.append(n_i) # create pandas tables question_attributes = pd.DataFrame( { "QuestionID": q_a_questions, "AttributeID": q_a_attrs } ).drop_duplicates() target_attributes = pd.DataFrame( { "TargetID": t_a_targets, "AttributeID": t_a_attrs } ) criteria = pd.DataFrame( { "QuestionID": cri_questions, "Threshold": cri_thresholds, "TargetID": cri_targets } ) caveats = pd.DataFrame( { "QuestionID": cav_questions, "TargetID": cav_targets, "Answer": cav_answers, "NoteID": cav_notes } ) super(XlsImporter, self).__init__( attribute_set, note_set, question_enum, target_enum, question_attributes, target_attributes, criteria, caveats, spreadsheet_data.colormap)
def __init__(self, file_ref): """ Constructs an MsspDataStore object from a collection of JSON dictionaries. :param file_ref: file or directory containing the json data :return: an MsspDataStore """ json_in = read_json(file_ref) # first thing to do is build the attribute and note lists attribute_set = ElementSet() note_set = ElementSet() colormap = pd.DataFrame(json_in['colormap']) question_enum = [None] * (1 + max([i['QuestionID'] for i in json_in['questions']])) target_enum = [None] * (1 + max([i['TargetID'] for i in json_in['targets']])) t_a_targets = [] t_a_attrs = [] q_a_questions = [] q_a_attrs = [] cri_questions = [] cri_thresholds = [] cri_targets = [] cav_questions = [] cav_targets = [] cav_answers = [] cav_notes = [] for t in json_in['targets']: # need to preserve IDs because of criteria and caveat maps t_index = t['TargetID'] try: target_enum[t_index] = MsspTarget.from_json(t) except IndexError: print 'Index error at {0}'.format(t_index) print t # add _attributes to element set and build mapping for a in t['Attributes']: # a is a text string a_index = attribute_set.add_element(Element(a)) # don't care about attribute colors t_a_targets.append(t_index) t_a_attrs.append(a_index) for q in json_in['questions']: # need to preserve IDs because of criteria and caveat maps q_index = q['QuestionID'] question_enum[q_index] = MsspQuestion.from_json(q) # add _attributes to element set and build mapping for a in q['Attributes']: # a is a text string a_index = attribute_set.add_element(Element(a)) # don't care about attribute colors q_a_questions.append(q_index) q_a_attrs.append(a_index) for cri in json_in['criteria']: # the threshold is a literal entry from the question's valid_answers- # needs to be converted into an index q_index = cri['QuestionID'] t_index = cri['TargetID'] thresh = indices(question_enum[q_index].valid_answers, lambda k: cri['Threshold'] == k) if len(thresh) == 0: print "QuestionID {0}, TargetID {1}, text {2}: no threshold found.".format( q_index, t_index, cri['Threshold']) thresh = [None] cri_questions.append(q_index) cri_targets.append(t_index) cri_thresholds.append(thresh[0]) for cav in json_in['caveats']: # the answer is a literal entry from the question's valid answers- # needs to be converted into an index. # the color needs to be converted into a colormap RGB. q_index = cav['QuestionID'] t_index = cav['TargetID'] rgb = colormap[colormap['ColorName'] == cav['Color']]['RGB'].iloc[0] note_id = note_set.add_element(Element(cav['Note'], 0, rgb)) # add to note set or find if exists if question_enum[q_index] is None: print "Question {0} is none!".format(q_index) ans_i = [None] else: ans_i = indices(question_enum[q_index].valid_answers, lambda k: cav['Answer'] == k) if len(ans_i) == 0: print "QuestionID {0}, TargetID {1}, valid answer '{2}' unparsed.".format( q_index, t_index, cav['Answer']) ans_i = [None] cav_questions.append(q_index) cav_targets.append(t_index) cav_answers.append(ans_i[0]) cav_notes.append(note_id) # create pandas tables question_attributes = pd.DataFrame( { "QuestionID": q_a_questions, "AttributeID": q_a_attrs } ).drop_duplicates() target_attributes = pd.DataFrame( { "TargetID": t_a_targets, "AttributeID": t_a_attrs } ) criteria = pd.DataFrame( { "QuestionID": cri_questions, "Threshold": cri_thresholds, "TargetID": cri_targets } ) caveats = pd.DataFrame( { "QuestionID": cav_questions, "TargetID": cav_targets, "Answer": cav_answers, "NoteID": cav_notes } ) super(JsonImporter, self).__init__( attribute_set, note_set, question_enum, target_enum, question_attributes, target_attributes, criteria, caveats, colormap)
def __init__(self, file_ref): """ Constructs an MsspDataStore object from a collection of JSON dictionaries. :param file_ref: file or directory containing the json data :return: an MsspDataStore """ json_in = read_json(file_ref) # first thing to do is build the attribute and note lists attribute_set = ElementSet() note_set = ElementSet() colormap = pd.DataFrame(json_in['colormap']) question_enum = [None] * ( 1 + max([i['QuestionID'] for i in json_in['questions']])) target_enum = [None] * ( 1 + max([i['TargetID'] for i in json_in['targets']])) t_a_targets = [] t_a_attrs = [] q_a_questions = [] q_a_attrs = [] cri_questions = [] cri_thresholds = [] cri_targets = [] cav_questions = [] cav_targets = [] cav_answers = [] cav_notes = [] for t in json_in['targets']: # need to preserve IDs because of criteria and caveat maps t_index = t['TargetID'] try: target_enum[t_index] = MsspTarget.from_json(t) except IndexError: print 'Index error at {0}'.format(t_index) print t # add _attributes to element set and build mapping for a in t['Attributes']: # a is a text string a_index = attribute_set.add_element( Element(a)) # don't care about attribute colors t_a_targets.append(t_index) t_a_attrs.append(a_index) for q in json_in['questions']: # need to preserve IDs because of criteria and caveat maps q_index = q['QuestionID'] question_enum[q_index] = MsspQuestion.from_json(q) # add _attributes to element set and build mapping for a in q['Attributes']: # a is a text string a_index = attribute_set.add_element( Element(a)) # don't care about attribute colors q_a_questions.append(q_index) q_a_attrs.append(a_index) for cri in json_in['criteria']: # the threshold is a literal entry from the question's valid_answers- # needs to be converted into an index q_index = cri['QuestionID'] t_index = cri['TargetID'] thresh = indices(question_enum[q_index].valid_answers, lambda k: cri['Threshold'] == k) if len(thresh) == 0: print "QuestionID {0}, TargetID {1}, text {2}: no threshold found.".format( q_index, t_index, cri['Threshold']) thresh = [None] cri_questions.append(q_index) cri_targets.append(t_index) cri_thresholds.append(thresh[0]) for cav in json_in['caveats']: # the answer is a literal entry from the question's valid answers- # needs to be converted into an index. # the color needs to be converted into a colormap RGB. q_index = cav['QuestionID'] t_index = cav['TargetID'] rgb = colormap[colormap['ColorName'] == cav['Color']]['RGB'].iloc[0] note_id = note_set.add_element(Element( cav['Note'], 0, rgb)) # add to note set or find if exists if question_enum[q_index] is None: print "Question {0} is none!".format(q_index) ans_i = [None] else: ans_i = indices(question_enum[q_index].valid_answers, lambda k: cav['Answer'] == k) if len(ans_i) == 0: print "QuestionID {0}, TargetID {1}, valid answer '{2}' unparsed.".format( q_index, t_index, cav['Answer']) ans_i = [None] cav_questions.append(q_index) cav_targets.append(t_index) cav_answers.append(ans_i[0]) cav_notes.append(note_id) # create pandas tables question_attributes = pd.DataFrame({ "QuestionID": q_a_questions, "AttributeID": q_a_attrs }).drop_duplicates() target_attributes = pd.DataFrame({ "TargetID": t_a_targets, "AttributeID": t_a_attrs }) criteria = pd.DataFrame({ "QuestionID": cri_questions, "Threshold": cri_thresholds, "TargetID": cri_targets }) caveats = pd.DataFrame({ "QuestionID": cav_questions, "TargetID": cav_targets, "Answer": cav_answers, "NoteID": cav_notes }) super(JsonImporter, self).__init__(attribute_set, note_set, question_enum, target_enum, question_attributes, target_attributes, criteria, caveats, colormap)