def leave_one_out(db, json_data_s, feature_type, algorithm): print("function: leave_one_out") print(json_data_s) data = json.loads(json_data_s) init_data(data) leave_one_out_clf_name = "__leave_one_out" data['name'] = leave_one_out_clf_name collections = db[feature_type] selector = data['selector'] data['selector']['ground_truth'] = {"$exists": True} samples = collections.find(selector) mod = __import__(algorithm+'.classifier', fromlist=['.']) for s in samples: ## train ## print(s) _data = copy.deepcopy(data) _data['selector'] = {'_id':{'$ne':s['_id']}} _data['overwrite'] = True _data['name'] = leave_one_out_clf_name # print _data print("train") result = mod.train(db,feature_type,_data) if result['status'] != 'success': return result print("predict") ## predict ## result = mod.predict(db,feature_type, Sample(s), _data) if result['status'] != 'success': return result print "evaluate" ## evaluate ## result = mongointerface.evaluate(db, feature_type, data, algorithm) if result['status'] != 'success': return result clf_id = generate_clf_id(algorithm,feature_type,data) result['event'] = {'_id':"leave_one_out::" + clf_id} return result
def route(db, json_data_s, operation, feature_type, algorithm=None): print("function: route") print("operation => " + operation) print(json_data_s) data = json.loads(json_data_s) init_data(data) # train, predict, testではalgorithmに対応するモジュールをimportする if operation in {'train','predict','test'}: mod = __import__(algorithm+'.classifier', fromlist=['.']) # train if operation=='train': return mod.__dict__[operation](db,feature_type,data) # clear_classifier elif operation=='clear_classifier': return mongointerface.clear_classifier(db, feature_type, data, algorithm) # clear_samples elif operation=='clear_samples': return mongointerface.clear_samples(db, feature_type, data) # get samples / sample detail elif operation=='get_samples': return mongointerface.get_samples(db,feature_type,data) elif operation=='get_sample_detail': return mongointerface.get_sample_detail(db,feature_type,data) # band elif operation=='band': return mongointerface.band(db, feature_type, data) # disband elif operation=='disband': return mongointerface.disband(db, feature_type, data) # evaluate elif operation=='evaluate': return mongointerface.evaluate(db, feature_type, data, algorithm) # operations using sample else: if data.has_key('id'): if (operation != "add"): sample = db[feature_type].find_one({'_id':data['id']}) if sample: for k,v in sample.items(): data[k] = v # generate sample ID automatically (can be collapse if several samples add at once) else: data['id'] = "sample_" + "%012d" % db[feature_type].find().count() sample = Sample(data) if not sample.ft: return error_json("sample must contain 'feature'") # add if operation == 'add': return mongointerface.add(db,feature_type,sample) # predict elif operation == 'predict': return mod.__dict__[operation](db, feature_type, sample, data) # unknown operations (error) return error_json('Error: unknown operation %s.' % operation)
def cross_validation(db, json_data_s, feature_type, algorithm, fold_num): print("function: cross_validation") data = json.loads(json_data_s) init_data(data) cv_group_head = "__cross_validation" # disband all previously taged cross_validation_groups for i in range(0,fold_num): group_name = generate_group_name(cv_group_head, i) mongointerface.disband(db, feature_type, {'group': group_name}) mongointerface.disband(db, feature_type, {'group': cv_group_head}) collections = db[feature_type] selector = data['selector'] data['selector']['ground_truth'] = {"$exists": True} samples = collections.find(selector) # group samples into N groups randomly samples_count = samples.count() if samples_count == 0: return error_json("ERROR: no samples are hit.") group_assignment = [] remainder = samples_count % fold_num quotient = int(samples_count / fold_num) for i in range(0,fold_num): n = quotient if i < remainder: n = n+1 print("group_count[%02d] = %d" % (i,n)) group_assignment += [generate_group_name(cv_group_head, i)] * n random.shuffle(group_assignment) # grouping samples into N group for i in range(samples_count): s = samples[i] group_name = group_assignment[i] #print group_name groups = s['group'] if not group_name in groups: groups = mongointerface.ensure_list(groups) groups.append(group_name) groups.append(cv_group_head) _id = s['_id'] collections.update_one({"_id":_id},{"$set":{'group':groups}}) mod = __import__(algorithm+'.classifier', fromlist=['.']) #print 'train and evaluation' # evaluate each group by trained classifiers confusion_matrices = [] # train, predict, and evaluate N classifiers for i in range(0,fold_num): ## train ## exclude_group = generate_group_name(cv_group_head, i) #print exclude_group _data = copy.deepcopy(data) _data['selector'] = {'group':{'$not':{'$all':[exclude_group]},'$all':[cv_group_head]},'ground_truth':{"$exists": True}} _data['overwrite'] = True _data['name'] = exclude_group #print _data result = mod.train(db,feature_type,_data) if result['status'] != 'success': return result ## predict ## selector = {'group':{'$all':[exclude_group]}} group_samples = mongointerface.get_training_samples(db,feature_type,False,selector) for s in group_samples: result = mod.predict(db,feature_type, Sample(s), _data) if result['status'] != 'success': return result _data['selector'] = selector ## evaluate ## result = mongointerface.evaluate(db, feature_type, _data, algorithm) if result['status'] != 'success': return result confusion_matrices.append(result['confusion_matrix']) cmat = None for m in confusion_matrices: if bool(cmat): cmat = merge_confusion_matrix(cmat,json.loads(m)) else: cmat = json.loads(m) result = success_json() result['confusion_matrix'] = cmat clf_id = generate_clf_id(algorithm,feature_type,data) result['event'] = {'_id':"cross_validation::" + clf_id} return result