Beispiel #1
0
def trainTwoWords(trainHSV=False):
    print "Gathering labels..."
    labels = getWordLists(method=sortAvail, checkRedundancy = False)
    shuffle(labels)
    training = []
    targets = []
    print "Converting to vectors..."
    for x, y, z in labels:
        training.append(convertToVector(x) + convertToVector(y))
        targets.append(convertToVector(z, True))
    training_data = training[:len(training) * 7 / 10]
    training_targets = targets[:len(targets) * 7 / 10]
    training_labels = labels[:len(labels) * 7 / 10]
    print len(training_data)
    print len(training_data[0])
    test_data = [x for x in training if x not in training_data]
    test_targets = [x for x in targets if x not in training_targets]
    test_labels = [x[2] for x in labels if x not in training_labels]
    print len(test_data)
    print "Fitting data..."
    if not trainHSV:
        clf = RandomForestRegressor(n_jobs = -1)
        clf.fit(training_data, training_targets)
        print "Predicting distributions..."
        test_data_results = clf.predict(test_data)
    else: #go through everything and make a separate tree for each 'zone' in the model
        clf = []
        print "Fitting Initial...."
        initial_clf = RandomForestRegressor(n_jobs = -1)
        initial_clf.fit(training_data, [x[0:3] for x in training_targets]) #replace this with something else later
        hue_clf = RandomForestRegressor(n_jobs = -1)
        print "Fitting Hue..."
        hue_clf.fit([x[2:14] + x[40:52] for x in training_data], [x[3:15] for x in training_targets])
        saturation_clf = RandomForestRegressor(n_jobs = -1)
        print "Fitting Saturation..."
        saturation_clf.fit([x[14:26] + x[52:64] for x in training_data], [x[15:27] for x in training_targets])
        value_clf = RandomForestRegressor(n_jobs = -1)
        print "Fitting Value..."
        value_clf.fit([x[26:38] + x[64:76] for x in training_data], [x[27:39] for x in training_targets])
        clf.append(initial_clf)
        clf.append(hue_clf)
        clf.append(saturation_clf)
        clf.append(value_clf)
        print "Predicting distributions..."
        initial_results = initial_clf.predict(test_data)
        hue_results = hue_clf.predict([x[2:14] + x[40:52] for x in test_data])
        saturation_results = saturation_clf.predict([x[14:26] + x[52:64] for x in test_data])
        value_results = value_clf.predict([x[26:38] + x[64:76] for x in test_data])
        #print initial_results
        #print hue_results
        #print saturation_results
        #print value_results
        #print len(initial_results)
        test_data_results = [np.hstack((w, x, y, z)) for w, x, y, z in zip(initial_results, hue_results, saturation_results, value_results)]
        #print len(test_data_results)
        #print test_data_results
    return test_data, test_data_results, test_labels, test_targets, clf
        max_features='auto',
        max_leaf_nodes=None,
        min_impurity_decrease=0.0,
        min_impurity_split=None,
        min_samples_leaf=3,
        min_samples_split=5,
        min_weight_fraction_leaf=0.0,
        n_estimators=400,
        n_jobs=-1,
        oob_score=False,
        random_state=None,
        verbose=0,
        warm_start=False,
    )
    regr.fit(X_train_train, y_train_train)
    base_model.append(regr)
    y_pred = regr.predict(X_train_val)
    df999 = pd.DataFrame({"y_val": y_train_val, "y_pred": y_pred})
    df_result = pd.concat([df_result, df999], axis=0)
    # validation dataによる評価指標の算出
    y_val = df_result["y_val"]
    y_pred = df_result["y_pred"]
    mse = mean_squared_error(y_val, y_pred)
    mae = mean_absolute_error(y_val, y_pred)
    print(
        "**** Training set score( {} ):  MSE={:.3f}  RMSE={:.3f}  MAE={:.3f}  Score={:.3f} ****"
        .format(i, round(mse, 3), round(np.sqrt(mse), 3), round(mae, 3),
                regr.score(X_train, y_train)))

# In[50]: