def clusters(dataSet, features, exemplars): ftrs = list(features) wI = wellIndex(dataSet) objectCount = len(wI) predicted = np.empty(objectCount, dtype=np.int) # Default to use all features if none habe been selected. if not ftrs: ftrs = data.imageFeatures(dataSet) if ftrs and exemplars: #(exemplars or wellTypes(dataSet)): # Training feature data. valueMatrix = np.matrix([scaledArray(dataSet, ftr) for ftr in ftrs], copy=False).transpose() # Construct from well type annotation. trainingLabels = np.copy(wI['type'].values) # Knock out large part of training values (to speed up training). trainingSample = np.random.rand(trainingLabels.size) < configuration(dataSet).wellTypeSample trainingLabels = np.where(trainingSample, trainingLabels, np.nan) # Override well type annotations where exemplars have been chosen by user. exemplarDict = dict(exemplars) for popId, exemplars in exemplarDict.iteritems(): for exemplar in exemplars: trainingLabels[exemplar] = popId # Prune training features and labels, based on presence of labels. trainingValues = valueMatrix[~np.isnan(trainingLabels)] trainingLabels = trainingLabels[~np.isnan(trainingLabels)] print "Begin training" #trainingValues = np.take(valueMatrix, exemplarObjects, axis=0) forest = RandomForestClassifier( n_estimators=10, n_jobs=-1, class_weight="balanced"#, #min_samples_split=0.01*trainingValues.size ) forest = forest.fit(trainingValues, trainingLabels) #forest.fit(trainingValues, exemplarLabels) print "End training" print "Begin classification" #predicted = forest.predict(valueMatrix) confidenceThreshold = data.config(dataSet).classifierConfidenceThreshold probabilities = forest.predict_proba(valueMatrix) maxProb = np.max(probabilities, axis=1) maxArgProb = np.argmax(probabilities, axis=1) predicted = np.where(maxProb > confidenceThreshold, np.choose(maxArgProb, forest.classes_), 2).astype(np.int) print "End classification" else: predicted.fill(2) # 2 unsure about all input when no training input is provided # Partition predicted column to object indices. return predicted
def objectInfo(dataSet, featureSet, column, row, plate, exemplars, probes): objects = allObjects(dataSet, column, row, plate, exemplars, probes) combined = wellIndex(dataSet).loc[objects].copy() # Feature values. for ftr in featureSet: combined[ftr] = np.take(scaledArray(dataSet, ftr), objects) if data.mdsColumnsPresent(dataSet): for mdsCol in data.mdsColumns: combined[mdsCol] = np.take(scaledArray(dataSet, mdsCol), objects) # Predicted population values. combined["population"] = np.take(clusters(dataSet, featureSet, exemplars), objects) # Generate well URLs on the spot, based on config. wellImages = data.config(dataSet).wellImages for name, urlFunction in wellImages.iteritems(): combined["img_" + name] = combined.apply( lambda row: urlFunction(int(row['plate']), int(row['column']), int(row['row'])), axis=1) return combined
def configuration(dataSet): return data.config(dataSet)