succesful_files = [] i = 0 for mov in poster_movies: i += 1 mov_name = mov['original_title'] mov_name1 = mov_name.replace(':', '/') poster_name = mov_name.replace(' ', '_') + '.jpg' if poster_name in imnames: img_path = poster_folder + poster_name try: img = image.load_img(img_path, target_size=(224, 224)) succesful_files.append(poster_name) x = image.img_to_array(img) x = np.expand_dims(x, axis=0) x = preprocess_input(x) features = model.predict(x) file_order.append(img_path) feature_list.append(features) genre_list.append(mov['genre_ids']) if np.max(np.asarray(feature_list)) == 0.0: print('problematic', i) if i % 250 == 0 or i == 1: print("Working on Image : ", i) except: failed_files.append(poster_name) continue else: continue print("Done with all features, please pickle for future use!") print(len(genre_list))
if not word[0] in row_dict: y_train[row_index] = 0 else: y_train[row_index] = model.row_labels_[row_dict[word[0]]] row_index += 1 print(x_train) print(y_train) x, y = su.shuffle(x_train, y_train, random_state=7) # 打乱样本 train_size = int(len(x) * 0.9) train_x, test_x, train_y, test_y = x[:train_size], x[ train_size:], y[:train_size], y[train_size:] model = se.RandomForestClassifier(n_estimators=100) model = model.fit(train_x, train_y) # 模型测试 pred_test_y = model.predict(test_x) fi = model.feature_importances_ print(pred_test_y) print(test_y) # 模型评估 print('bike_hour的r2_score得分:', accuracy_score(test_y, pred_test_y)) print(fi) # process end print("--->") print(len(row_dict)) out_file = open("data.txt", "w") for i in range(row_num): for j in range(unsta_max): print(matrix[i][j], file=out_file, end=' ') print(file=out_file)