def mapSequences(rows): # triage into three separate lists # transition, emission, and gold goldTests = [] for row in rows: personRecords = row[1] sorted(row[1], key=lambda x: (x[6], x[2])) startState = "START" previous = startState keepInModel = not utils.isTest() goldStandard = [] for item in personRecords: # handle transition current = buildTransitionWrapper(item[5], item[4], item[2]) # handle emission emissionKey = previous + "_" + current patientAmount = item[3] # keep in model? #new dictionaries - yield creates each one if (keepInModel): yield ((previous, current, transitionType), 1) yield ((emissionKey, patientAmount, emissionType), 1) else: goldStandard.append((previous, current, emissionKey, patientAmount, item[4], item[5])) previous = current # push gold standard, if it exists if not keepInModel: yield ((row[0][0], row[0][1], goldType), goldStandard)
def determineDictionary(self, test, train): if utils.isTest(self.setSplit): return (test, True) return (train, False)