Ejemplo n.º 1
0
                        "make a random prediction)")

    args = parser.parse_args()

    #==============================================================================
    # Generate classification data
    #==============================================================================
    SEED = 2018

    # Load training data
    LS = load_from_csv(args.ls)
    # Load test data
    TS = load_from_csv(args.ts)

    with measure_time("Creating fingerprint"):
        X_train = create_fingerprints(LS["SMILES"].values)
    y_train = LS["ACTIVE"].values

    TS = load_from_csv(args.ts)
    X_test = create_fingerprints(TS["SMILES"].values)

    #==============================================================================
    # Define Base (level 0) and Stacking (level 1) estimators
    #==============================================================================
    base_clf = [
        RandomForestClassifier(n_estimators=3100,
                               bootstrap=True,
                               max_depth=None,
                               class_weight='balanced_subsample'),
        MLPClassifier(random_state=42,
                      alpha=1e-5,
Ejemplo n.º 2
0
                        help="Use a decision tree classifier (by default, "
                             "make a random prediction)")

    args = parser.parse_args()

    # Load training data
    LS = load_from_csv(args.ls)
    # Load test data
    TS = load_from_csv(args.ts)

    # -------------------------- Model --------------------------- #

    # LEARNING
    # Create fingerprint features and output
    with measure_time("Creating fingerprint"):
        X_LS = create_fingerprints(LS["SMILES"].values)
    y_LS = LS["ACTIVE"].values


    # Set the parameters by cross-validation
    tuned_parameters = [{ # every hyper-parameter can be tested
                        }]
    scores = ['roc_auc']
    
    for score in scores:

        # Chercher GridSearchCV dans documentation
        clf = GridSearchCV(KNeighborsClassifier(n_neighbors=53, algorithm='auto',
                                                weights='distance'),
    tuned_parameters, cv=2, scoring='%s' % score, n_jobs=-1, verbose=10)
        clf.fit(X_LS, y_LS)
    minority = LS[LS.ACTIVE == 1]
    # Upsample minority class
    minority_upsampled = resample(
        minority,
        replace=True,  # sample with replacement
        n_samples=len(majority),  # to match majority class
        random_state=0)  # reproducible results
    # Combine majority class with upsampled minority class
    LS_upsampled = pd.concat([majority, minority_upsampled])

    # -------------------------- Model --------------------------- #

    # LEARNING
    # Create fingerprint features and output
    with measure_time("Creating fingerprint"):
        X_LS = create_fingerprints(LS_upsampled["SMILES"].values)
    y_LS = LS_upsampled["ACTIVE"].values

    # Set the parameters by cross-validation
    tuned_parameters = [{  # every hyper-parameter can be tested
    }]
    scores = ['roc_auc']

    for score in scores:

        # Chercher GridSearchCV dans documentation
        clf = GridSearchCV(RandomForestClassifier(
            n_estimators=3100,
            bootstrap=True,
            max_depth=None,
            class_weight='balanced_subsample',