def telemetry(sid, data): if data: # The current steering angle of the car steering_angle = data["steering_angle"] # The current throttle of the car throttle = data["throttle"] # The current speed of the car speed = data["speed"] # The current image from the center camera of the car imgString = data["image"] image = Image.open(BytesIO(base64.b64decode(imgString))) image_array = np.asarray(image) image_array = functions.preprocess(image_array) image = np.array([image_array]) steering_angle = float( model.predict(image_array[None, :, :, :], batch_size=1)) throttle = controller.update(float(speed)) print(steering_angle, throttle) send_control(steering_angle, throttle) # save frame if args.image_folder != '': timestamp = datetime.utcnow().strftime('%Y_%m_%d_%H_%M_%S_%f')[:-3] image_filename = os.path.join(args.image_folder, timestamp) image.save('{}.jpg'.format(image_filename)) else: # NOTE: DON'T EDIT THIS. sio.emit('manual', data={}, skip_sid=True)
def main(): # Load training data from csv filename = 'train_data.csv' raw_data = np.loadtxt(filename, dtype=np.str, delimiter=",") image_paths = raw_data[:, 0] labels = raw_data[:, 1] # One-hot encode labels lb = preprocessing.LabelBinarizer() one_hot_labels = lb.fit_transform(labels) # Save transformation matrix functions.save_object(lb, 'one-hot-matrix.pkl') # Load training images from directory images = [] for path in image_paths: images.append(imread(path)) images = np.array(images) # pre-process images images = np.array([functions.preprocess(img) for img in images]) # Split data into training and validation sets x_train, x_valid, y_train, y_valid = train_test_split(images, one_hot_labels, test_size=VALIDATION_SPLIT) # Augment training set with rotated and flipped images x_train, y_train = functions.augment_dataset(x_train, y_train)
def preprocessing_test(mat): """ Checks if a matrix matches its original version after being preprocessed and reverse_preprocessed. """ means, stds, maxes, temp = f.preprocess(mat) temp2 = f.reverse_preprocess(means, stds, maxes, temp) diff = np.round(mat - temp2, 10) return np.all(diff == 0.)
def main(): positive_set = 'test_extractions/bc_samples.txt' #'test_extractions/test-neural-hash-samples.txt' negative_set = 'test_extractions/bc_grounds.txt' #'test_extractions/test-neural-hash-ground.txt' analogy_list = functions.get_list_re(positive_set) non_analogy_list = functions.get_list_re(negative_set) samples = [(text, 'YES') for text in analogy_list] + [(text, 'NO') for text in non_analogy_list] train_data, train_labels, test_data, test_labels = functions.preprocess(samples, 0.5) pipeline = [] classifiers = ['svc', 'linearsvc', 'nusvc', 'naive', 'maxEnt', 'neural'] classifiers2 = ['neural'] representations = ['tfidf', 'count', 'hash'] representations2 = ['hash'] for classifier in classifiers: for representation in representations: pipeline = (Pipeline([(representation, helpers.get_function(representation)), (classifier, helpers.get_function(classifier)),])) parameters = helpers.generate_parameters(representation, classifier) grid_search = GridSearchCV(pipeline, parameters, n_jobs=-1, verbose=1, error_score=-1) print("Performing grid search...") print("pipeline:", [name for name, _ in pipeline.steps]) print("parameters:") pprint(parameters) t0 = time() grid_search.fit(train_data, train_labels) print("done in %0.3fs" % (time() - t0)) print() print("Best score: %0.3f" % grid_search.best_score_) print("Best parameters set:") best_parameters = grid_search.best_estimator_.get_params() for param_name in sorted(parameters.keys()): print("\t%s: %r" % (param_name, best_parameters[param_name])) print() print("Getting the confusion matrix for the best estimator:") prediction = grid_search.best_estimator_.predict(test_data) matrix = confusion_matrix(test_labels, prediction, labels = ['YES', 'NO']) precision, recall, f_measure = functions.fmeasure(matrix) accuracy = accuracy_from_matrix(matrix) print("Accuracy ", accuracy) print("Precision, recall, f-score:") print(precision, recall, f_measure) print(matrix) print()
def analogy_pipeline(positive_set, negative_set, percent_test, representation, classifier, seed, extra={"sub_class": ""}, timer=1000000000): start = time.time() # Read in the set of positive examples analogy_list = functions.get_list_re(positive_set) # Read in the set of negative examples non_analogy_list = functions.get_list_re(negative_set) # Randomly divide them into a training set and a test set nan_set = functions.read_CSV('corpora/dmb_open_test.csv', 1) samples = [(text, 'YES') for text in analogy_list] + [ (text, 'NO') for text in non_analogy_list ] + [(txt, 'NO') for txt in nan_set] bt_parsed = functions.readCSV('base_target.csv', 1) extra = functions.set_extra(extra) num_samples = min(len(analogy_list), len(non_analogy_list)) # Run classifier, generate results based on the value passed in for representation beginTimer = time.time() train_data, train_labels, test_data, test_labels = functions.preprocess( bt_parsed, percent_test, seed, num_samples, 'test_main_interface_output') # Make sure the classifier runs within a set time seed = (seed - 1000) / 30 dic = {'data': []} for dat in test_data: dic['data'].append(dat) pd.DataFrame(dic, columns=['data']).to_csv('./testing/test_set' + str(int(seed)) + '.csv') # train_data = functions.strip_id(train_data) # test_data = functions.strip_id(test_data) score, matrix, precision, recall, f_measure = functions.classify_pipeline( train_data, train_labels, test_data, test_labels, classifier, representation, seed, extra, timer) print(score) print(matrix) print(precision, recall, f_measure) return score, f_measure
def main(): # Load one-hot-encoding matrix labeler = functions.load_object('one-hot-matrix.pkl') # Load the trained model model = load_model('model.h5') # Define the webcam object cam = cv2.VideoCapture(0) while (True): # Capture video frame ret, frame = cam.read() # Preprocess frame image = functions.preprocess(frame) # Predict object in frame logits = model.predict(image, batch_size=1) # Decode logits result = labeler.inverse_transform(logits)
# Check the environment warnings.simplefilter('ignore') np.random.seed(0) if six.PY3: tff.framework.set_default_executor(tff.framework.create_local_executor()) tff.federated_computation( lambda: 'The tensorflow federated environment is correctly setup!')() # Load the data emnist_train, emnist_test = load_data(path) # Generate sample batch example_dataset = emnist_train.create_tf_dataset_for_client( emnist_train.client_ids[1]) example_element = iter(example_dataset).next() preprocessed_example_dataset = preprocess(example_dataset, NUM_EPOCHS, SHUFFLE_BUFFER, BATCH_SIZE) sample_batch = tf.nest.map_structure(lambda x: x.numpy(), iter(preprocessed_example_dataset).next()) # Create federated data for each client sample_clients = emnist_train.client_ids[0:NUM_CLIENTS] federated_train_data = make_federated_data(emnist_train, sample_clients, NUM_EPOCHS, SHUFFLE_BUFFER, BATCH_SIZE) # Function to create tff,learning instances def model_fn(): keras_model = create_compiled_keras_model() return tff.learning.from_compiled_keras_model(keras_model, sample_batch)
def extract_patches(self, h5db, new_folder): print 'OpenSlide needed to extract patches.' return None ''' for centre in self.centres: print('[cnn][patch_extraction] Selected Centre: ', centre) # each centre may have more than one annotation XML file, so here we retrieve # a list of all the XMLs related to the current centre annotation_list = np.sort(self.get_annotation_list(centre, self.xml_source_fld)) # for each XML file in the annotation list # we want to extract tumor and normal patches for xml_file in annotation_list: files_counter +=1 # variable to shape the final data vector ''' print('[debug] ', self.name) print('[debug] ', self.settings) self.set_files_counter(self.count_annotation_files()) print('[dataset] {0} [extract_patches] {1} total annotation files.'. format(self.name, self.files_counter)) for centre in self.centres: annotation_list = self.get_annotation_list(centre) for xml_file in annotation_list: slide_path = self.get_wsi_path(centre, xml_file) xml_path = os.path.join(self.xml_source_fld, xml_file) # retrieving the information about the file analysed. # info is a dictionary with the following keys: # info['centre'], current centre number # info['patient'], current patient number # info['node'], current WSI node info = self.get_info(xml_path, centre) #functions.setDBHierarchy(h5db, self.settings,info) if info['patient'] == '008_Mask.tif': continue if xml_path != None: ## add check slide is open and ok # preprocess takes the WSI path, and the slide_level and returns the # the WSI openslide obj, the tumor annotation mask, the WSI image # and the tumor contours if self.name == 'camelyon16': print('import openslides') #slide = openslide.OpenSlide(slide_path) #rgb_im = np.array(slide.read_region((0,0),7,slide.level_dimensions[7])) #mask_file = xml_path+'Tumor_{}_Mask.tif'.format(info['patient']) #import pdb; pdb.set_trace() annotations = np.asarray( openslide.OpenSlide(xml_path).read_region( (0, 0), 7, slide.level_dimensions[7])) annotations_mask = annotations[:, :, 0] #import pdb; pdb.set_trace() im_contour = rgb_im else: import pdb pdb.set_trace() slide, annotations_mask, rgb_im, im_contour = functions.preprocess( slide_path, xml_path, slide_level=self.settings['slide_level']) tum_patch_list, tum_patch_point = integral.patch_sampling_using_integral( slide, self.settings['slide_level'], annotations_mask, self.settings['patch_size'], self.settings['n_samples']) # conversion of the lists to np arrays tum_patch_array = np.asarray(tum_patch_list) #import pdb; pdb.set_trace() tum_locations = np.array(tum_patch_point) # storage in the HDF5 db self.store(h5db, info, tum_patch_array, tum_locations, 'tumor') # reverting the tumor mask to find normal tissue and extract patches # Note : # normal_mask = tissu mask(morp_im) - tummor mask(annotations_mask) ##### restart from here ## morp_im = functions.get_morp_im(rgb_im) normal_im = morp_im - annotations_mask ## np.min(normal_im) := -1.0 normal_im = normal_im == 1.0 normal_im = (normal_im).astype(int) # sampling normal patches with uniform distribution nor_patch_list, nor_patch_point = integral.patch_sampling_using_integral( slide, self.settings['slide_level'], normal_im, self.settings['patch_size'], self.settings['n_samples']) nor_patch_array = np.asarray(nor_patch_list) normal_patches_locations = np.array(nor_patch_point) # storing the normal patches and their locations self.store(h5db, info, nor_patch_array, nor_patch_point, 'normal') ''' Visualisation ''' # plotting the tumor locations in the XML file # Drawing the normal patches sampling points # tumor_locations.png shows the tumor patches locations in red # and the normal patches locations in green tumor_locations_im = rgb_im plt.figure() plt.imshow(tumor_locations_im) for p_x, p_y in normal_patches_locations: plt.scatter(p_y, p_x, c='g') #cv2.circle(tumor_locations_im,(p_y,p_x),30,(0,255,0),10) for p_x, p_y in tum_locations: plt.scatter(p_y, p_x, c='r') #cv2.circle(tumor_locations_im,(p_y,p_x),30,(255,0,0), 10) print( '[cnn][patch_extraction] Saving tumor locations image') plt.savefig( os.path.join( new_folder, 'level{}_centre{}_patient{}_node{}_tumor_locations.png' .format(self.settings['slide_level'], info['centre'], info['patient'], info['node']))) plt.close() #print('Saving tumor locations image') #plt.savefig('tumor_locations_patient0{}_node{}'.format(info['patient'], info['node'])) print( '[cnn][patch_extraction] Saving annotation mask and normal tissue mask' ) plt.figure() plt.imshow(annotations_mask) plt.savefig( os.path.join( new_folder, 'level{}_centre{}_patient{}_node{}_annotation_mask.png' .format(self.settings['slide_level'], info['centre'], info['patient'], info['node']))) plt.close() plt.figure() plt.imshow(normal_im) plt.savefig( os.path.join( new_folder, 'level{}_centre{}_patient{}_node{}_normal_tissue_mask.png' .format(self.settings['slide_level'], info['centre'], info['patient'], info['node']))) plt.close() plt.close('all') self.tum_counter += len(tum_patch_array) self.nor_counter += len(nor_patch_array) #self.nor_counter = 0 return
def analogy_trial(positive_set, negative_set, percent_test, representation, classifier, extra={"sub_class": ""}, timer=1000000000, comment=""): caller = inspect.stack()[1][3] start = time.time() # Read in the set of positive examples analogy_list = functions.get_list_re(positive_set) # Read in the set of negative examples non_analogy_list = functions.get_list_re(negative_set) # Randomly divide them into a training set and a test set samples = [(text, 'YES') for text in analogy_list] + [(text, 'NO') for text in non_analogy_list] extra = functions.set_extra(extra) # Run classifier, generate results based on the value passed in for representation beginTimer = time.time() now = time.strftime("%c") currentTime = now now = now.replace(" ", "_") now = now.replace(":", "") train_data, train_labels, test_data, test_labels = functions.preprocess( samples, percent_test, caller) # Make sure the classifier runs within a set time try: score, matrix, precision, recall, f_measure = functions.classify( train_data, train_labels, test_data, test_labels, classifier, representation, extra, timer) # catch the timeout error except timeout.TimeoutError: print("Classifier timeout.") print("Output error in log.") algoTime = time.time() - beginTimer runTime = time.time() - start outputData = [ currentTime, positive_set, negative_set, percent_test, representation, classifier, extra, "", "", "", "", "", "", "", "Algorithm Timeout" ] else: algoTime = time.time() - beginTimer runTime = time.time() - start outputData = [ currentTime, positive_set, negative_set, percent_test, representation, classifier, extra, score, matrix, precision, recall, f_measure, runTime, algoTime, comment ] # Store results outputResults(outputData) if caller != "test_main_interface_output": print("Successfully logged trial results") outputData = outputData[7:-3] outputData[1] = outputData[1].tolist() return outputData
def extract_patches(self): """ (more doc please) """ errors = 0 warnings = 0 settings = self.config['settings'] for centre in self.centres: for patient in self.get_patients(centre): self.logger.info('processing patient: {}'.format(patient)) slide_path = self.get_wsi_path(centre, patient) xml_path = self.get_annotation_path(centre, patient) info = self.get_info(centre, patient) pat_res_dir = self.make_patient_dir(info) if not pat_res_dir: self.logger.error( "patient {}: problems with results dir...".format( patient)) errors += 1 continue h5db_path = os.path.join(pat_res_dir, self.h5db_bname + '.h5') try: h5db = hd.File(h5db_path, 'w') except Exception as e: self.logger.error( "patient {}: can't open my H5 DB '{}': {} ".format( patient, h5db_path, e)) errors += 1 continue slide, annotations_mask, rgb_im, im_contour = preprocess( slide_path, xml_path, slide_level=settings['slide_level']) # reverting the tumor mask to find normal tissue and extract patches # Note : # normal_mask = tissu mask(morp_im) - tummor mask(annotations_mask) morp_im = get_morp_im(rgb_im) normal_im = morp_im - annotations_mask # np.min(normal_im) := -1.0 normal_im = normal_im == 1.0 normal_im = (normal_im).astype(int) # masks are the same for any sample batch ;-) # [TO-DO] make switchable from config/CL plt.figure() plt.imshow(annotations_mask) img_file = self.get_image_fname(pat_res_dir, 'annotation_mask', info) plt.savefig(img_file) plt.close() self.logger.info( 'patient {}: Annotation mask image saved to: {}'.format( patient, img_file)) plt.figure() plt.imshow(normal_im) img_file = self.get_image_fname(pat_res_dir, 'normal_tissue_mask', info) plt.savefig(img_file) plt.close() self.logger.info( 'patient {}: Normal tissue mask image saved to: {}'.format( patient, img_file)) opts = dict( map(lambda k: (k, settings[k]), ( 'area_overlap', 'bad_batch_size', 'gray_threshold', 'margin_width_x', 'margin_width_y', 'method', 'n_samples', 'patch_size', 'slide_level', 'white_level', 'white_threshold', 'white_threshold_incr', 'white_threshold_max', ))) # batch sample & store -- keep it small to avoid OOM! In # "linear" sampling mode, more batches might be needed, so go # for a run and get the extracted pathes and the last # index. Loop until no patches come out # [TO-DO] store info in _per-patient_ H5 DB # a patient case (:= slide) the tumor annotation mask is # usually (much) smaller than the normal tissue mask, thus a # different number of batches is needed to extract all the # tumor and normal patches. So we compute then normal tissue # mask once. Apart from that, there's no relation between # tumor and normal patches, hence we batch-loop two times: a # first time for the tumor case and a second time for the # normal case. N.B. In 'random' sampling mode, just one batch # is ever done. index = 0 # ignored in 'random' mode -- only one batch done tum_patch_point = [] bcnt_t, bcnt_n = 0, 0 last_idx_t = last_idx_n = -1 if settings['window']: self.logger.info( "patient {}: restricting nonzero points range to {}%, {}%" .format(patient, settings['window'][0], settings['window'][1])) nzx_n, nzy_n = integral.nonzero_range(normal_im, settings['window']) # *** Warning! *** Split loops doesn't work if we want to show # images: there's data dependency on "normal_patches_locations". # normal tissue while (True): self.logger.info( "patient {}: >>> [normal] starting batch {}".format( patient, bcnt_n)) opts['start_idx'] = last_idx_n + 1 nor_patch_list, nor_patch_point, last_idx_n = integral.patch_sampling( slide, normal_im, nzx_n, nzy_n, **opts) if nor_patch_point and nor_patch_list: nor_patch_array = np.asarray(nor_patch_list) normal_patches_locations = np.array(nor_patch_point) self.store_patient(info, nor_patch_array, nor_patch_point, 'normal', h5db, bcnt_n) else: self.logger.info( 'patient {}: batch {}: no (more) normal patches'. format(patient, bcnt_n)) break self.nor_counter += len(nor_patch_array) self.logger.info( "patient {}: <<< [normal] done batch {}".format( patient, bcnt_n)) if last_idx_n == None: # in 'random' method, this tells us that we're done sampling break bcnt_n += 1 # {end-while} # TO-DO: batch runs should be better encapsulated (aux fun/method)... # tumors masks are usually too small for windowed sampling, so # take the full range nzx_t, nzy_t = integral.nonzero_range(annotations_mask, []) while (True): self.logger.info( "patient {}: >>> [tumor] starting batch {}".format( patient, bcnt_t)) opts['start_idx'] = last_idx_t + 1 tum_patch_list, tum_patch_point, last_idx_t = integral.patch_sampling( slide, annotations_mask, nzx_t, nzy_t, **opts) if tum_patch_list and tum_patch_point: tum_patch_array = np.asarray(tum_patch_list) tum_locations = np.array(tum_patch_point) self.store_patient(info, tum_patch_array, tum_locations, 'tumor', h5db, bcnt_t) else: self.logger.info( 'patient {}: batch {}: no (more) tumor patches'. format(patient, bcnt_t)) break if opts['method'] == 'random': if bcnt_n != bcnt_t: self.logger.error( "[BUG] Can't make scatter image(s): batch count mismatch" ) errors += 1 else: # plotting the tumor locations in the XML file Drawing the # normal patches sampling points tumor_locations.png shows the # tumor patches locations in red and the normal patches # locations in green tumor_locations_im = rgb_im plt.figure() plt.imshow(tumor_locations_im) # Warning! Data dependency on previous normal batch run for p_x, p_y in normal_patches_locations: plt.scatter(p_y, p_x, c='g') for p_x, p_y in tum_locations: plt.scatter(p_y, p_x, c='r') img_file = self.get_image_fname( pat_res_dir, 'tumor_locations', info, bcnt_t) plt.savefig(img_file) plt.close() self.logger.info( 'patient {}: batch {}: tumor locations image saved to: {}' .format(patient, bcnt_t, img_file)) self.tum_counter += len(tum_patch_array) self.logger.info( "patient {}: <<< [tumor] done batch {}".format( patient, bcnt_t)) if last_idx_t == None: # in 'random' method, this tells us that we're done sampling break bcnt_t += 1 # {end-while} h5db.close() self.logger.info( "patient {}: processed in {} (normal) + {} (tumor) batches" .format(patient, bcnt_n, bcnt_t)) self.logger.info("patient {}: data saved to H5 DB: {}".format( patient, h5db_path)) # {end-for-patient} # {end-for-centre} self.report['errors'] = errors self.report['warnings'] = warnings
else: logging.basicConfig(level=logging.INFO) logging.info( f"using {m}.{e} model to calculate submitid {i}") if v else None # load word embedding model start = datetime.now() vectors = load_model(m, e) logging.info(f"model loaded in {datetime.now() - start}") if v else None # get source code and problem text from database that corresponds with input submit ID code, problem = get(i) # preprocessing includes normalization and tokenization logging.info("preprocessing code and problem text...") if v else None problem_processed, comments_processed, code_only = preprocess( problem, code) # count words in code comment comment_word_count_raw = 0 for line in comments_processed: comment_word_count_raw += len(line) logging.info("preprocessing finished") if v else None # calculate code density logging.info("calculating code density...") if v else None comment_line_density, comment_char_density = calculate_density( comments_processed, code_only) logging.info("finished calculating") if v else None # calculate code header score logging.info("calculating header score...") if v else None header_score = calculate_header_score(comments_processed)
def main(): positive_set = '../latest_analogy/test_extractions/bc_samples.txt' #'test_extractions/test-neural-hash-samples.txt' negative_set = '../latest_analogy/test_extractions/bc_grounds.txt' #'test_extractions/test-neural-hash-ground.txt' analogy_list = functions.get_list_re(positive_set) non_analogy_list = functions.get_list_re(negative_set) samples = [(text, 1) for text in analogy_list] + [(text, 0) for text in non_analogy_list] train_data, train_labels, test_data, test_labels = functions.preprocess( samples, 0.15) overlap_input = [('LP', 'count'), ('TSVM', 'tfidf')] rng = np.random.RandomState(42) random_unlabeled_points = rng.rand(len(train_labels)) < 0.7 train_labels = np.array(train_labels) test_labels = np.array(test_labels) train_labels[random_unlabeled_points] = -1 train_data = np.array(train_data) prediction_second_input = [] pipeline = [] no_as_yes = [] # predictions with label NO classified with label YES yes_as_no = [] # predictions with label YES classified with label NO count = 0 for element in overlap_input: pipeline = helpers.get_function(element[0]) representation = helpers.get_function(element[1]) parameters = helpers.get_parameters(element[0]) train_set = representation.fit_transform(train_data).toarray() test_set = representation.transform(test_data).toarray() grid_search = GridSearchCV(pipeline, parameters, n_jobs=-1, verbose=10, error_score=-1) grid_search.fit(train_set, train_labels) if count == 0: prediction = grid_search.best_estimator_.predict(test_set) matrix = confusion_matrix(test_labels, prediction, labels=[1, 0]) else: prediction_second_input = grid_search.best_estimator_.predict( test_set) matrix = confusion_matrix(test_labels, prediction_second_input, labels=[1, 0]) count += 1 print(matrix) for i in range(len(test_labels)): #print(test_labels[i], prediction[i], prediction_second_input[i]) if (test_labels[i] != prediction[i]) and ( prediction[i] == prediction_second_input[i]): if test_labels[i] == 0: no_as_yes.append(test_data[i]) else: yes_as_no.append(test_data[i]) print("Overlapping NO as YES:") l1 = len(no_as_yes) print("Number: ", l1) for i in range(l1): print(no_as_yes[i]) print("Overlapping YES as NO:") l2 = len(yes_as_no) print("Number: ", l2) for i in range(l2): print(yes_as_no[i])
from scikitTSVM import SKTSVM import warnings warnings.filterwarnings("ignore", category=PendingDeprecationWarning) warnings.filterwarnings("ignore", category=DeprecationWarning) tsvm = SKTSVM(probability=False, C=0.01, gamma=1.0, kernel='linear', lamU=1.0) percent_test = 0.15 positive_set = 'data/bc_samples.txt' negative_set = 'data/bc_grounds.txt' unlabeled_set = 'data/unlabeled-data.csv' analogy_list = functions.get_list_re(positive_set) non_analogy_list = functions.get_list_re(negative_set) unlabeled_list = functions.get_list_re(unlabeled_set) samples = [(text, 1) for text in analogy_list] + [(text, 0) for text in non_analogy_list] train_data, train_labels, test_data, test_labels = functions.preprocess( samples, percent_test) j = 0 for sample in unlabeled_list: if j <= 20000: train_data.append(sample) train_labels.append(-1) j += 1 train_labels = np.array(train_labels) test_labels = np.array(test_labels) train_data = np.array(train_data) TfidfVect = TfidfVectorizer(tokenizer=lambda doc: doc, lowercase=False) train_set = TfidfVect.fit_transform(train_data).toarray() test_set = TfidfVect.transform(test_data).toarray() # Label Propagation """
# Couple of ways included to create the pandas dataframe, one from sqlite, one via path, and finally one via github # Sqlite option # # songs_df = pd.read_sql_table('songs', 'sqlite:///db.sqlite3') # path option # songs_df = pd.read_csv('../Data/SpotifyAudioFeaturesApril2019_duplicates_removed.csv') # github option infile = "https://raw.githubusercontent.com/spotify-recommendation-engine-3/data_science/master/Data/SpotifyAudioFeaturesApril2019_duplicates_removed.csv" songs_df = pd.read_csv(infile) y = songs_df[songs_df.columns[:3]] X = songs_df[songs_df.columns[3:]] my_model = create_model(preprocess(X)) @app.route('/', methods=['GET', 'POST']) def plot_png(): fig = create_figure() output = io.BytesIO() FigureCanvas(fig).print_png(output) return Response(output.getvalue(), mimetype='image/png') def create_figure(): song_df = songs_df.sample() song_df = song_df.iloc[:, 3:] songs_to_plot = suggest_songs(song_df, songs_df, y, my_model) fig = Figure(figsize=(9, 9), edgecolor='gray')