Esempio n. 1
0
def create_perobject_class_table(classifier, classNames):
    '''
    classifier: generalclassifier object
    classNames: list/array of class names
    RETURNS: Saves table with columns Table Number, Image Number, Object Number, class number, class name to a pre defined
    table in the database (the class number is the predicted class)
    '''
    nClasses = len(classNames)

    if p.class_table is None:
        raise ValueError('"class_table" in properties file is not set.')

    index_cols = UniqueObjectClause()
    class_cols = UniqueObjectClause() + ', class_number, class'
    class_col_defs = object_key_defs() + ', class VARCHAR (%d)'%(3) + ', class_number INT'


    # Drop must be explicitly asked for Classifier.ScoreAll
    print('Drop table...')
    db.execute('DROP TABLE IF EXISTS %s'%(p.class_table))
    print('Create table...')
    db.execute('CREATE TABLE %s (%s)'%(p.class_table, class_col_defs))
    print('Create index...')
    db.execute('CREATE INDEX idx_%s ON %s (%s)'%(p.class_table, p.class_table, index_cols))

    print('Getting data...')
    number_of_features = len(db.GetColnamesForClassifier())
    wheres = _where_clauses(p, dm, None)
    data = []
    for idx, where_clause in enumerate(wheres):
        data = db.execute('SELECT %s, %s FROM %s '
                          '%s WHERE %s'
                          %(UniqueObjectClause(p.object_table),
                            ",".join(db.GetColnamesForClassifier()), p.object_table,'', where_clause),
                          silent=(idx > 10))
        #data.extend(result)
        #print('Getting predictions...')
        cell_data, object_keys = processData(data)

        predicted_classes = classifier.Predict(cell_data)
        #print('Writing to database...')
        if len(object_keys.shape) > 2:
            expr = 'CASE '+ ''.join(["WHEN TableNumber=%d AND ImageNumber=%d AND ObjectNumber=%d THEN '%s'"%(
                object_keys[ii][0], object_keys[ii][1], object_keys[ii][2], predicted_classes[ii] )
                for ii in range(0, len(predicted_classes))])+ " END"
            expr2 = 'CASE '+ ''.join(["WHEN TableNumber=%d AND ImageNumber=%d AND ObjectNumber=%d THEN '%s'"%(
                object_keys[ii][0], object_keys[ii][1], object_keys[ii][2],
                classNames[predicted_classes[ii] - 1]) for ii in range(0, len(predicted_classes))])+ " END"
        elif len(object_keys.shape) == 2:
            expr = 'CASE '+ ''.join(["WHEN ImageNumber=%d AND ObjectNumber=%d THEN '%s'"%(
                object_keys[ii][0], object_keys[ii][1], predicted_classes[ii] )
                for ii in range(0, len(predicted_classes))])+ " END"
            expr2 = 'CASE '+ ''.join(["WHEN ImageNumber=%d AND ObjectNumber=%d THEN '%s'"%(
                object_keys[ii][0], object_keys[ii][1], classNames[predicted_classes[ii] - 1])
                for ii in range(0, len(predicted_classes))])+ " END"
        db.execute('INSERT INTO %s (%s) SELECT %s, %s, %s FROM %s'%(p.class_table, class_cols, index_cols, expr, expr2, p.object_table),
            silent=True)
        print(idx)
    db.Commit()
Esempio n. 2
0
def FilterObjectsFromClassN(classNum, classifier, filterKeys, uncertain):
    '''
    uncertain: allows to search for uncertain (regarding the probs assigned by the classifier) cell images
    classNum: 1-based index of the class to retrieve obKeys from
    classifier: trained classifier object
    filterKeys: (optional) A list of specific imKeys OR obKeys (NOT BOTH)
        to classify.
        * WARNING: If this list is too long, you may exceed the size limit to
          MySQL queries.
        * Useful when fetching N objects from a particular class. Use the
          DataModel to get batches of random objects, and sift through them
          here until N objects of the desired class have been accumulated.
        * Also useful for classifying a specific image or group of images.
    RETURNS: A list of object keys that fall in the specified class (but not all objects?),
        if Properties.area_scoring_column is specified, area sums are also
        reported for each class
    '''

    if filterKeys != [] and filterKeys is not None:

        if isinstance(filterKeys, str):
            whereclause = filterKeys #+ " AND"
        else:
            isImKey = len(filterKeys[0]) == len(image_key_columns())
            if isImKey:
                whereclause = GetWhereClauseForImages(filterKeys) #+ " AND"
            else:
                whereclause = GetWhereClauseForObjects(filterKeys) #+ " AND"
    else:
        whereclause = ""

    if p.area_scoring_column:
        data = db.execute('SELECT %s, %s FROM %s WHERE %s'%(UniqueObjectClause(p.object_table),
        ",".join(db.GetColnamesForClassifier()),
        _objectify(p, p.area_scoring_column), p.object_table, whereclause))
        area_score = data[-1] #separate area from data
        data = data[:-1]
    else:
        data = db.execute('SELECT %s, %s FROM %s WHERE %s'%(UniqueObjectClause(p.object_table),
        ",".join(db.GetColnamesForClassifier()), p.object_table, whereclause))

    cell_data, object_keys = processData(data)
    res = [] # list
    if uncertain:
        # Our requirement: if the two largest scores are smaller than threshold
        probabilities = classifier.PredictProba(cell_data) #
        threshold = 0.1 # TODO: This threshold should be adjustable
        sorted_p = np.sort(probabilities)[:,-2:]# sorted array
        diff = sorted_p[:,1] - sorted_p[:,0]

        indices = np.where(diff < threshold)[0] # get all indices where this is true
        res = [object_keys[i] for i in indices] 
    else:
        predicted_classes = classifier.Predict(cell_data)
        res = object_keys[predicted_classes == classNum * np.ones(predicted_classes.shape)].tolist() #convert to list 
    return map(tuple,res) # ... and then to tuples
    def do_by_steps(tables, filter_name, area_score=False):
        filter_clause = '1 = 1'
        join_clause = ''
        if filter_name is not None:
            filter = p._filters[filter_name]
            if isinstance(filter, cpa.sqltools.OldFilter):
                join_table = '(%s) as filter' % str(filter)
            else:
                if p.object_table in tables:
                    join_table = None
                else:
                    join_table = p.object_table
                    filter_clause = str(filter)
            if join_table:
                join_clause = 'JOIN %s USING (%s)' % (join_table, ','.join(
                    image_key_columns()))

        wheres = _where_clauses(p, dm, filter_name)
        num_clauses = len(wheres)
        counts = {}

        # iterate over where clauses to go through whole set
        for idx, where_clause in enumerate(wheres):
            if filter_clause is not None:
                where_clause += ' AND ' + filter_clause
            if area_score:
                data = db.execute('SELECT %s, %s, %s FROM %s '
                                  '%s WHERE %s' %
                                  (UniqueImageClause(p.object_table), ",".join(
                                      db.GetColnamesForClassifier()),
                                   _objectify(p, p.area_scoring_column),
                                   tables, join_clause, where_clause),
                                  silent=(idx > 10))
                area_score = data[-1]  #separate area from data
                data = data[:-1]
            else:
                data = db.execute('SELECT %s, %s FROM %s '
                                  '%s WHERE %s' %
                                  (UniqueObjectClause(p.object_table),
                                   ",".join(db.GetColnamesForClassifier()),
                                   tables, join_clause, where_clause),
                                  silent=(idx > 10))

            cell_data, image_keys = processData(data)
            for i in range(cell_data.shape[0]):
                for j in range(cell_data.shape[1]):
                    try:
                        float(cell_data[i, j])
                    except:
                        print(i, j, cell_data[i, j], type(cell_data[i, j]))
            predicted_classes = classifier.Predict(cell_data)
            for i in range(0, len(predicted_classes)):
                row_cls = tuple(
                    np.append(image_keys[i][0], predicted_classes[i]))
                oneCount = np.array([1])
                if area_score:
                    oneCount = np.append(oneCount, area_score[i])
                if row_cls in counts:
                    counts[row_cls] += oneCount
                else:
                    counts[row_cls] = oneCount

            if cb:
                cb(min(1, idx / float(num_clauses)))  #progress
        return counts