def Bootstrap_cv(estimator1, estimator2, X, y, score_func, cv=None, n_jobs=1, verbose=0, ratio=.5): X, y = cross_validation.check_arrays(X, y, sparse_format='csr') cv = cross_validation.check_cv(cv, X, y, classifier= cross_validation.is_classifier(estimator1)) if score_func is None: if not hasattr(estimator1, 'score') or \ not hasattr(estimator2, 'score'): raise TypeError( "If no score_func is specified, the estimator passed " "should have a 'score' method. The estimator %s " "does not." % estimator1) # We clone the estimator to make sure that all the folds are # independent, and that it is pickle-able. scores = \ cross_validation.Parallel( n_jobs=n_jobs, verbose=verbose)( cross_validation.delayed( dual_cross_val_score) (cross_validation.clone(estimator1), cross_validation.clone(estimator2), X, y, score_func, train, test, verbose, ratio) for train, test in cv) return np.array(scores)
def cross_val_score(estimator, X, y=None, score_func=None, cv=None, n_jobs=-1, verbose=0, as_dvalues=False): """Evaluate a score by cross-validation. Replacement of :func:`sklearn.cross_validation.cross_val_score`, used to support computation of decision values. """ X, y = check_arrays(X, y, sparse_format='csr') cv = check_cv(cv, X, y, classifier=is_classifier(estimator)) if score_func is None: if not hasattr(estimator, 'score'): raise TypeError( "If no score_func is specified, the estimator passed " "should have a 'score' method. The estimator %s " "does not." % estimator) # We clone the estimator to make sure that all the folds are # independent, and that it is pickle-able. scores = Parallel(n_jobs=n_jobs, verbose=verbose)( delayed(_cross_val_score)(clone(estimator), X, y, score_func, train, test, verbose, as_dvalues) for train, test in cv) return np.array(scores)
def dynamic_cross_val_predict(estimator, fv, esa_feature_list, unigram_feature_list, dynamic_X, y=None, cv=None, verbose=0, fit_params=None): print "dynamic predict cross val mit %s" % esa_feature_list + unigram_feature_list vec = DictVectorizer() tfidf = TfidfTransformer() X = vec.fit_transform(fv).toarray() # X = tfidf.fit_transform(X).toarray() X, y = cross_validation.indexable(X, y) cv = cross_validation.check_cv(cv, X, y, classifier=cross_validation.is_classifier(estimator)) preds_blocks = [] cross_val_step = 0 for train, test in cv: fv_copy = copy.deepcopy(fv) #baue X in jedem Schritt neu for i in range(0,len(fv)): #jedes i steht für einen featuredict feature_dict = fv_copy[i] dynamic_vec = dynamic_X[cross_val_step] #zeigt auf esa_vec for feature in esa_feature_list: feature_dict.update(dynamic_vec[find_index_for_dynamic_feature(feature)][i]) #das i-te feature-dict mit esa-feature updaten for feature in unigram_feature_list: feature_dict.update(dynamic_vec[find_index_for_dynamic_feature(feature)][i]) #das i-te feature-dict mit esa-feature updaten X = vec.fit_transform(fv_copy).toarray() # X = tfidf.fit_transform(X).toarray() preds_blocks.append(cross_validation._fit_and_predict(cross_validation.clone(estimator), X, y, train, test, verbose, fit_params)) cross_val_step+=1 preds = [p for p, _ in preds_blocks] locs = np.concatenate([loc for _, loc in preds_blocks]) if not cross_validation._check_is_partition(locs, cross_validation._num_samples(X)): raise ValueError('cross_val_predict only works for partitions') inv_locs = np.empty(len(locs), dtype=int) inv_locs[locs] = np.arange(len(locs)) # Check for sparse predictions if sp.issparse(preds[0]): preds = sp.vstack(preds, format=preds[0].format) else: preds = np.concatenate(preds) return preds[inv_locs]
def dynamic_cross_val_score(estimator, fv, esa_feature_list, unigram_feature_list, dynamic_X, y=None, scoring=None, cv=None, verbose=0, fit_params=None): print "dynamic cross val mit %s" % esa_feature_list + unigram_feature_list vec = DictVectorizer() tfidf = TfidfTransformer() X = vec.fit_transform(fv).toarray() # X= tfidf.fit_transform(X).toarray() X, y = cross_validation.indexable(X, y) cv = cross_validation.check_cv(cv, X, y, classifier=cross_validation.is_classifier(estimator)) scorer = cross_validation.check_scoring(estimator, scoring=scoring) scores = [] cross_val_step = 0 for train, test in cv: fv_copy = copy.deepcopy(fv) #baue X in jedem Schritt neu for i in range(0,len(fv)): #jedes i steht für einen featuredict feature_dict = fv_copy[i] dynamic_vec = dynamic_X[cross_val_step] #zeigt auf esa_vec for feature in esa_feature_list: feature_dict.update(dynamic_vec[find_index_for_dynamic_feature(feature)][i]) #das i-te feature-dict mit esa-feature updaten for feature in unigram_feature_list: feature_dict.update(dynamic_vec[find_index_for_dynamic_feature(feature)][i]) #das i-te feature-dict mit esa-feature updaten X = vec.fit_transform(fv_copy).toarray() # X = tfidf.fit_transform(X).toarray() scores.append(cross_validation._fit_and_score(cross_validation.clone(estimator), X, y, scorer, train, test, verbose, None, fit_params)) cross_val_step += 1 return np.array(scores)[:, 0]
def Scaled(algorithm): """Create a pipelined algorithm that performs feature scaling.""" return Pipeline([('scaler', StandardScaler()), ('learner', clone(algorithm))])
ratio = .2 estimators = 20 train_size = .7 #output = ratio*forest.predict(test_data) + (1-ratio)*logit.predict(test_data) #output = extra_forest.predict(test_data) #Get bootstrapped data bs = cross_validation.Bootstrap(train_data.shape[0], n_bootstraps=estimators, train_size=train_size, random_state=0) cv = cross_validation.check_cv(bs, train_data[0::,1::], train_data[0::,0], classifier=cross_validation.is_classifier(extra_forest)) for train, test in cv: #Create training data X = train_data[0::,1::] y = train_data[0::,0] #Create estimator ef = cross_validation.clone(extra_forest) lgi = cross_validation.clone(logit) est = Pipeline([('ef', ef), ('logit', lgi)]) est.fit(X[train], y[train]) #print est.feature_importances_ score.append(est.score(X[test], y[test])) #Format output score = np.array(score) output = est.predict(test_data) #Score print score print score.mean() print "EF+Logit Accuracy: %0.2f (+/- %0.2f)" % (score.mean(), score.std() / 2)