def get_notes(note_width=.25): # if pickled result exists, return that # exists = os.path.isfile('pickle/' + directory + '_notes') # if exists: # return pickle.load(open('pickle/' + directory + '_notes', 'rb')) # if pickled result does not exist, create it and pickle it piano = [] bass = [] sax = [] song_lens = [] songs = {} num_songs = 0 for file in glob.glob(directory + "/*.mid"): midi = None try: midi = converter.parse(file) except Exception as e: print('Could not parse file: {}'.format(file)) continue print("Parsing %s" % file) num_songs += 1 songs[file] = {} instruments = instrument.partitionByInstrument(midi) song_lens.append(midi.highestTime) for i in instruments: name = i.getInstrument().instrumentName if name == 'Piano': songs[file]['piano'] = i piano.append(i) elif name == 'Acoustic Bass': songs[file]['bass'] = i bass.append(i) elif name == 'Saxophone': songs[file]['sax'] = i sax.append(i) enumerated_notes, embedded = clean(songs, song_lens, note_width) output = [] for s in embedded: if len(s) == 3: output.append(s) pickle.dump(enumerated_notes, open('pickle/' + directory + '_encodings', 'wb')) pickle.dump(output, open('pickle/' + directory + '_embedded', 'wb')) return enumerated_notes, output
def load(grid_num): df_train = pd.read_csv('train.csv', delimiter=',', parse_dates=True, index_col="Unnamed: 0") df_train.dataframeName = 'train.csv' df_test = pd.read_csv("test.csv", delimiter=",", parse_dates=True, index_col="Unnamed: 0") df_test.dataframeName = 'test.csv' x_train, y_train, x_test, y_test = clean_data.clean( df_train, df_test, grid_num) return x_train, y_train, x_test, y_test
def predict(test, xgb_model): # with open('vec_model.pkl') as f: # vec_model = pickle.load(f) X_xgb = clean(test, isTrain=False, isjson=True) # X_vec = clean_desc(test_file, isTrain=False) xgb_preds = xgb_model.predict_proba(X_xgb)[:,1] # vec_preds = vec_model.predict_proba(X_vec)[:,1] # preds = (xgb_preds + vec_preds) / 2 # return X_xgb, X_vec, preds return X_xgb, xgb_preds
# Load a potentially pretrained model from disk. if datapath("model_small"): lda_model = ldamodel.load(datapath("model_small")) else: # train model lda_model = ldamodel(corpus, num_topics=10, id2word=dictionary) pprint.pprint(lda_model.top_topics(corpus, topn=5)) # Save model to disk. temp_file = datapath("model_small") lda_model.save(temp_file) return lda_model, corpus, dictionary def visualize_pyldavis(lda_model, corpus, dictionary): prepared = pyLDAvis.gensim.prepare(lda_model, corpus, dictionary) pyLDAvis.save_html(prepared, 'vis_topic_model_02.html') pyLDAvis.show(prepared) if __name__ == '__main__': #list of docs as lists of strings texts = clean('voted-kaggle-dataset.csv') lda_model, corpus, dictionary = train_model(texts) # print(lda_model.show_topics()) visualize_pyldavis(lda_model, corpus, dictionary)
# CraigsRecommendation # created by Mikaela Hoffman-Stapleton and Arda Aysu from flask import Flask, request import pandas as pd import re from datetime import datetime from clean_data import clean from filter_data import filter from cluster_data import cluster from gmaps_fns import * from webpage import searchpage, recpage # import json # for demo pd.set_option('display.max_colwidth', -1) listings = clean('craigslist.csv') listings = listings.assign(index=range(len(listings))) # for demo data: # gmaps = pd.read_csv('gmaps.csv') # listings = pd.merge(listings, gmaps, how='outer', on='index') # with open('places_demo.json') as data_file: # places = json.load(data_file) # with open('distances_demo.json') as data_file: # distances = json.load(data_file) app = Flask(__name__) @app.route('/') def search_page():