def main(): if os.path.exists(MODEL_FILE): logistic = joblib.load(MODEL_FILE) else: logistic = LogisticRegression() if not os.path.exists(DATA_FILE): feature_extraction.main('blog post', 'glamour') df = pd.read_csv(DATA_FILE) df = df.fillna(0) print(df.shape) # TODO (@messiest) y ~ 1 if y in top 25%, 0 else y = df['impact'].apply(lambda x: 0 if x < df['impact'].mean() else 1) x = df.iloc[:, 36:] print(x.shape, y.shape) x_train, x_test, y_train, y_test = train_test_split(x, y) logistic.fit(x_train, y_train) predictions = logistic.predict(x_test) print(classification_report(y_test, predictions)) joblib.dump(logistic, MODEL_FILE)
def model(): if os.path.exists(MODEL_FILE): logistic = joblib.load(MODEL_FILE) else: logistic = LogisticRegression(penalty='l2') if not os.path.exists(DATA_FILE): feature_extraction.main('blog post', 'glamour') df = pd.read_csv(DATA_FILE) df = df.fillna(0) y = df['impact'].apply(lambda x: 0 if x < df['impact'].mean() else 1) x = df.iloc[:, 36:] x_train, x_test, y_train, y_test = train_test_split(x, y) logistic.fit(x_train, y_train) predictions = logistic.predict(x_test) print(classification_report(y_test, predictions)) joblib.dump(logistic, MODEL_FILE) features = dict(zip(x.columns, list(logistic.coef_[0]))) return features
def predict_audio(clf_and_scaler_folder, used_features, clf_to_use): output_path = config['OUTPUT_FOLDER'] # From csv files get features from 1 second frames features = fex.main(output_path, used_features, clf_to_use) scaler_file = 'scaler.sav' scaler_file = clf_and_scaler_folder + '/' + scaler_file scaler = pickle.load(open(scaler_file, 'rb')) clf_file = 'finalized_model.sav' clf_file = clf_and_scaler_folder + '/' + clf_file clf = pickle.load(open(clf_file, 'rb')) scaled_data = scaler.transform(features) predicted = clf.predict(scaled_data) new_predicted = remove_outliers(predicted) new_predicted = list(new_predicted) likely_to_be_music = sum(new_predicted) / len(new_predicted) return (likely_to_be_music, new_predicted)
import comp_dataset import preproc import feature_extraction import experiment1 import experiment2 import experiment3 import experiment4 build_dataset.main() preprocessing.main() feature_extraction.main() experiment1.main() experiment2.main() experiment3.main() experiment4.main()
from sklearn.externals import joblib import feature_extraction classifier = joblib.load('model/trained.pkl') print('[+]Enter URL:') url = input() check = feature_extraction.main(url) prediction = classifier.predict(check) print(prediction)