),), #('scaler' ,StandardScaler(copy=True,with_mean=False,with_std=True)), ('scaler' , MaxAbsScaler(copy=True)), ('classifier', SGDClassifier(verbose=0, class_weight='balanced', random_state=randomSeeds['randForClassifier']) ), ] ) parameters={'vectorizer__ngram_range':[(1,2)], 'vectorizer__min_df':[2], 'vectorizer__max_df':[.98], 'classifier__alpha':[1], 'classifier__learning_rate':['invscaling'], 'classifier__eta0':[ .01], 'classifier__loss':[ 'hinge' ], 'classifier__penalty':['l2'], } p = tl.TextPipelineTuningHelper( pipeline, parameters, trainingDataDir=args.trainingData, testSplit=args.testSplit, gridSearchBeta=args.gridSearchBeta, gridSearchCV=args.gridSearchCV, indexOfYes=args.indexOfYes, randomSeeds=randomSeeds, ).fit() print p.getReports(wIndex=args.wIndex, tuningIndexFile=args.tuningIndexFile, wPredictions=args.wPredictions, predFilePrefix=args.predFilePrefix, compareBeta=args.compareBeta, verbose=args.verbose, )
lowercase=False, # done in preprocessing stop_words='english', # token_pattern=r'\b([a-z_]\w+)\b', # use default for now ), ), ('scaler', StandardScaler(copy=True, with_mean=False, with_std=True)), #('scaler' , MaxAbsScaler(copy=True)), ('classifier', SGDClassifier(verbose=0, random_state=randomSeeds['randForClassifier'])), ]) parameters = { 'vectorizer__ngram_range': [(1, 2)], 'vectorizer__min_df': [0.1], 'vectorizer__max_df': [.7], #'vectorizer__max_features':[2000], 'classifier__loss': ['hinge'], 'classifier__penalty': ['l2'], 'classifier__alpha': [5], 'classifier__learning_rate': ['optimal'], 'classifier__class_weight': ['balanced'], 'classifier__eta0': [.01], } p = tl.TextPipelineTuningHelper( pipeline, parameters, beta=4, cv=2, randomSeeds=randomSeeds, ).fit() print p.getReports()
verbose=1, random_state=randomSeeds['randForClassifier'], learning_rate=0.05, n_estimators=1600, max_depth=3, min_samples_split=600, min_samples_leaf=150, max_features=0.7, subsample=0.85, )), ]) parameters = { # 'classifier__learning_rate': [0.025], # 'classifier__n_estimators': [3200,], # 'classifier__max_depth': [3, 6, 9 ], # 'classifier__min_samples_split': [525, 550, 575, 600, 625, ], # 'classifier__min_samples_leaf': [100, 125, 150, 175], # 'classifier__max_features': [0.6, 0.65, 0.7, 0.75, 0.8, 0.9, None, ], # 'classifier__subsample': [0.6, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 1.0,], } note = '\n'.join([ "blessed GB.", ]) + '\n' p = tl.TextPipelineTuningHelper( pipeline, parameters, randomSeeds=randomSeeds, note=note, ).fit() print(p.getReports())