예제 #1
0
def clusters(dataSet, features, exemplars):
    ftrs = list(features)

    wI = wellIndex(dataSet)
    objectCount = len(wI)
    predicted = np.empty(objectCount, dtype=np.int)

    # Default to use all features if none habe been selected.
    if not ftrs:
        ftrs = data.imageFeatures(dataSet)

    if ftrs and exemplars:   #(exemplars or wellTypes(dataSet)):
        # Training feature data.
        valueMatrix = np.matrix([scaledArray(dataSet, ftr) for ftr in ftrs], copy=False).transpose()

        # Construct from well type annotation.
        trainingLabels = np.copy(wI['type'].values)

        # Knock out large part of training values (to speed up training).
        trainingSample = np.random.rand(trainingLabels.size) < configuration(dataSet).wellTypeSample
        trainingLabels = np.where(trainingSample, trainingLabels, np.nan)

        # Override well type annotations where exemplars have been chosen by user.
        exemplarDict = dict(exemplars)

        for popId, exemplars in exemplarDict.iteritems():
            for exemplar in exemplars:
                trainingLabels[exemplar] = popId

        # Prune training features and labels, based on presence of labels.
        trainingValues = valueMatrix[~np.isnan(trainingLabels)]
        trainingLabels = trainingLabels[~np.isnan(trainingLabels)]

        print "Begin training"
        #trainingValues = np.take(valueMatrix, exemplarObjects, axis=0)
        forest = RandomForestClassifier(
            n_estimators=10,
            n_jobs=-1,
            class_weight="balanced"#,
            #min_samples_split=0.01*trainingValues.size
        )
        forest = forest.fit(trainingValues, trainingLabels)    #forest.fit(trainingValues, exemplarLabels)
        print "End training"

        print "Begin classification"
        #predicted = forest.predict(valueMatrix)
        confidenceThreshold = data.config(dataSet).classifierConfidenceThreshold
        probabilities = forest.predict_proba(valueMatrix)
        maxProb = np.max(probabilities, axis=1)
        maxArgProb = np.argmax(probabilities, axis=1)
        predicted = np.where(maxProb > confidenceThreshold, np.choose(maxArgProb, forest.classes_), 2).astype(np.int)
        print "End classification"
    else:
        predicted.fill(2)   # 2 unsure about all input when no training input is provided

    # Partition predicted column to object indices.
    return predicted
예제 #2
0
def objectInfo(dataSet, featureSet, column, row, plate, exemplars, probes):
    objects = allObjects(dataSet, column, row, plate, exemplars, probes)
    combined = wellIndex(dataSet).loc[objects].copy()

    # Feature values.
    for ftr in featureSet:
        combined[ftr] = np.take(scaledArray(dataSet, ftr), objects)

    if data.mdsColumnsPresent(dataSet):
        for mdsCol in data.mdsColumns:
            combined[mdsCol] = np.take(scaledArray(dataSet, mdsCol), objects)

    # Predicted population values.
    combined["population"] = np.take(clusters(dataSet, featureSet, exemplars), objects)

    # Generate well URLs on the spot, based on config.
    wellImages = data.config(dataSet).wellImages
    for name, urlFunction in wellImages.iteritems():
        combined["img_" + name] = combined.apply(
            lambda row: urlFunction(int(row['plate']), int(row['column']), int(row['row'])), axis=1)

    return combined
예제 #3
0
def configuration(dataSet):
    return data.config(dataSet)