Exemple #1
0
def leave_one_out(db, json_data_s, feature_type, algorithm):
    print("function: leave_one_out")
    print(json_data_s)
    data = json.loads(json_data_s)
    init_data(data)
        
    leave_one_out_clf_name = "__leave_one_out"    
    data['name'] = leave_one_out_clf_name
    
    collections = db[feature_type]
    selector = data['selector']
    data['selector']['ground_truth'] = {"$exists": True}
    samples = collections.find(selector)

    mod = __import__(algorithm+'.classifier', fromlist=['.'])

    for s in samples:
        ## train ##
        print(s)
        _data = copy.deepcopy(data)
        _data['selector'] = {'_id':{'$ne':s['_id']}}
        _data['overwrite'] = True
        _data['name'] = leave_one_out_clf_name
        # print _data
        print("train")
        result = mod.train(db,feature_type,_data)
        if result['status'] != 'success':
            return result
        print("predict")
        ## predict ##
        result = mod.predict(db,feature_type, Sample(s), _data)
        if result['status'] != 'success':
            return result

    
    print "evaluate"
    ## evaluate ##
    result = mongointerface.evaluate(db, feature_type, data, algorithm)
    if result['status'] != 'success':
        return result
    clf_id = generate_clf_id(algorithm,feature_type,data)
    result['event'] = {'_id':"leave_one_out::" + clf_id}
    return result
Exemple #2
0
def route(db, json_data_s, operation, feature_type, algorithm=None):
    print("function: route")
    print("operation => " + operation)
    print(json_data_s)
    data = json.loads(json_data_s)
    
    init_data(data)
            
    # train, predict, testではalgorithmに対応するモジュールをimportする
    if operation in {'train','predict','test'}:
        mod = __import__(algorithm+'.classifier', fromlist=['.'])

# train
    if operation=='train':
        return mod.__dict__[operation](db,feature_type,data)        

# clear_classifier
    elif operation=='clear_classifier':
        return mongointerface.clear_classifier(db, feature_type, data, algorithm)

# clear_samples
    elif operation=='clear_samples':
        return mongointerface.clear_samples(db, feature_type, data)

# get samples / sample detail
    elif operation=='get_samples':
        return mongointerface.get_samples(db,feature_type,data)
    elif operation=='get_sample_detail':
        return mongointerface.get_sample_detail(db,feature_type,data)

# band
    elif operation=='band':
        return mongointerface.band(db, feature_type, data)

# disband
    elif operation=='disband':
        return mongointerface.disband(db, feature_type, data)

# evaluate
    elif operation=='evaluate':
        return mongointerface.evaluate(db, feature_type, data, algorithm)

    # operations using sample
    else:        
        if data.has_key('id'):
            if (operation != "add"):
                sample = db[feature_type].find_one({'_id':data['id']})
                if sample:
                    for k,v in sample.items():                    
                        data[k] = v
        # generate sample ID automatically (can be collapse if several samples add at once)
        else:            
            data['id'] = "sample_" +  "%012d" % db[feature_type].find().count()

        sample = Sample(data)
        if not sample.ft:
            return error_json("sample must contain 'feature'")                           

# add
        if operation == 'add':
            return mongointerface.add(db,feature_type,sample)

# predict
        elif operation == 'predict':
            return mod.__dict__[operation](db, feature_type, sample, data)
                        
# unknown operations (error)
    return error_json('Error: unknown operation %s.' % operation)
Exemple #3
0
def cross_validation(db, json_data_s, feature_type, algorithm, fold_num):
    print("function: cross_validation")
    data = json.loads(json_data_s)
    init_data(data)

    cv_group_head = "__cross_validation"    
    # disband all previously taged cross_validation_groups
    for i in range(0,fold_num):
        group_name = generate_group_name(cv_group_head, i)
        mongointerface.disband(db, feature_type, {'group': group_name})
    mongointerface.disband(db, feature_type, {'group': cv_group_head})
        
    collections = db[feature_type]
    selector = data['selector']
    data['selector']['ground_truth'] = {"$exists": True}
    samples = collections.find(selector)

    # group samples into N groups randomly

    samples_count = samples.count()
    if samples_count == 0:
        return error_json("ERROR: no samples are hit.")

    group_assignment = []
    remainder = samples_count % fold_num
    quotient = int(samples_count / fold_num)
    for i in range(0,fold_num):
        n = quotient
        if i < remainder:
            n = n+1
        print("group_count[%02d] = %d" % (i,n))
        group_assignment += [generate_group_name(cv_group_head, i)] * n
    random.shuffle(group_assignment)
                
    # grouping samples into N group
    for i in range(samples_count):
        s = samples[i]
        group_name = group_assignment[i]
        #print group_name

        groups = s['group']
        if not group_name in groups:
            groups = mongointerface.ensure_list(groups)
            groups.append(group_name)
            groups.append(cv_group_head)
            _id = s['_id']
            collections.update_one({"_id":_id},{"$set":{'group':groups}})

    mod = __import__(algorithm+'.classifier', fromlist=['.'])

    #print 'train and evaluation'
    # evaluate each group by trained classifiers    
    confusion_matrices = []
    # train, predict, and evaluate N classifiers
    for i in range(0,fold_num):
        ## train ##
        exclude_group = generate_group_name(cv_group_head, i)
        #print exclude_group
        _data = copy.deepcopy(data)
        _data['selector'] = {'group':{'$not':{'$all':[exclude_group]},'$all':[cv_group_head]},'ground_truth':{"$exists": True}}
        _data['overwrite'] = True
        _data['name'] = exclude_group
        #print _data
        result = mod.train(db,feature_type,_data)
        if result['status'] != 'success':
            return result
            
        ## predict ##
        selector = {'group':{'$all':[exclude_group]}}        
        group_samples = mongointerface.get_training_samples(db,feature_type,False,selector)
        for s in group_samples:
            result = mod.predict(db,feature_type, Sample(s), _data)
            if result['status'] != 'success':
                return result
        _data['selector'] = selector
        ## evaluate ##

        result = mongointerface.evaluate(db, feature_type, _data, algorithm)
        if result['status'] != 'success':
            return result
        confusion_matrices.append(result['confusion_matrix'])
    
    cmat = None
    for m in confusion_matrices:
        if bool(cmat):
            cmat = merge_confusion_matrix(cmat,json.loads(m))
        else:
            cmat = json.loads(m)
    result = success_json()
    result['confusion_matrix'] = cmat
    clf_id = generate_clf_id(algorithm,feature_type,data)
    result['event'] = {'_id':"cross_validation::" + clf_id}
    return result