succesful_files = []
i = 0
for mov in poster_movies:
    i += 1
    mov_name = mov['original_title']
    mov_name1 = mov_name.replace(':', '/')
    poster_name = mov_name.replace(' ', '_') + '.jpg'
    if poster_name in imnames:
        img_path = poster_folder + poster_name
        try:
            img = image.load_img(img_path, target_size=(224, 224))
            succesful_files.append(poster_name)
            x = image.img_to_array(img)
            x = np.expand_dims(x, axis=0)
            x = preprocess_input(x)
            features = model.predict(x)
            file_order.append(img_path)
            feature_list.append(features)
            genre_list.append(mov['genre_ids'])
            if np.max(np.asarray(feature_list)) == 0.0:
                print('problematic', i)
            if i % 250 == 0 or i == 1:
                print("Working on Image : ", i)
        except:
            failed_files.append(poster_name)
            continue

    else:
        continue
print("Done with all features, please pickle for future use!")
print(len(genre_list))
Esempio n. 2
0
    if not word[0] in row_dict:
        y_train[row_index] = 0
    else:
        y_train[row_index] = model.row_labels_[row_dict[word[0]]]
    row_index += 1
print(x_train)
print(y_train)

x, y = su.shuffle(x_train, y_train, random_state=7)  # 打乱样本
train_size = int(len(x) * 0.9)
train_x, test_x, train_y, test_y = x[:train_size], x[
    train_size:], y[:train_size], y[train_size:]
model = se.RandomForestClassifier(n_estimators=100)
model = model.fit(train_x, train_y)
# 模型测试
pred_test_y = model.predict(test_x)
fi = model.feature_importances_
print(pred_test_y)
print(test_y)
# 模型评估
print('bike_hour的r2_score得分:', accuracy_score(test_y, pred_test_y))
print(fi)
#  process end
print("--->")
print(len(row_dict))

out_file = open("data.txt", "w")
for i in range(row_num):
    for j in range(unsta_max):
        print(matrix[i][j], file=out_file, end=' ')
    print(file=out_file)