def _processChoices(self, acronym_expansions): """ input: list(acronym expansion strings) returns: y_labels (list): of integer labels assigned to acronym expansions labelToExpansion (dict): to convert label number to acronym expansion """ y_labels = [] labelToExpansion = {} if(len(acronym_expansions) == 0): return y_labels, labelToExpansion y_labels = [index for index in range(len(acronym_expansions))] labelToExpansion[0] = acronym_expansions[0] for indexAhead in range(1, len(acronym_expansions)): new_expansion = acronym_expansions[indexAhead] newIsUnique = True # check if new_expansion is same as a previous expansion # if same assign previous label and move on for label, expansion in labelToExpansion.items(): if(AcronymExpansion.areExpansionsSimilar(expansion, new_expansion)): newIsUnique = False y_labels[indexAhead] = label break # if label is new indeed, then give it a label ID (integer) and # make an entry in the labelToExpansion dictionary if(newIsUnique): new_class_label = len(labelToExpansion) labelToExpansion[new_class_label] = new_expansion y_labels[indexAhead] = new_class_label return y_labels, labelToExpansion