def astar(maze, start, goal): pq = PriorityQueue() pq.put(start, 0) predecessor = {start: None} g_value = {start: 0} while not pq.is_empty(): current_cell = pq.get() if current_cell == goal: return get_path(predecessor, start, goal) for directions in ["up", "right", "down", "left"]: row_offset, col_offset = offsets[directions] neighbor = (current_cell[0] + row_offset, current_cell[1] + col_offset) if is_legal_pos(maze, neighbor) and neighbor not in g_value: g_value[neighbor] = g_value[current_cell] + 1 f_value = g_value[neighbor] + heuristic(neighbor, goal) pq.put(neighbor, f_value) predecessor[neighbor] = current_cell return None
def uploader(): if request.method == 'POST': file = request.files['file'] original = file.filename ext = get_extension(file.filename) file.filename = generate_filename() file.save(get_path(secure_filename(file.filename) + ext)) print(secure_filename(file.filename)) flash('File: \'{}\' uploaded'.format(original)) return render_template('index.html')
def bfs(maze, start, goal): q = Queue() q.enqueue(start) predecessors = {start: None} while not q.is_empty(): current_cell = q.dequeue() if current_cell == goal: return get_path(predecessors, start, goal) for directions in ["up","right","down","left"]: row_offset, col_offset = offsets[directions] neighbor = (current_cell[0]+row_offset, current_cell[1]+col_offset) if is_legal_pos(maze, neighbor) and neighbor not in predecessors: q.enqueue(neighbor) predecessors[neighbor] = current_cell return None
def dfs(maze, start, goal): stack = Stack() stack.push(start) predecessors = {start: None} while not stack.is_empty(): #print(stack) current_cell = stack.pop() if current_cell == goal: return get_path(predecessors, start, goal) for directions in ["up","right","down","left"]: row_offset, col_offset = offsets[directions] neighbor = (current_cell[0]+row_offset, current_cell[1]+col_offset) if is_legal_pos(maze, neighbor) and neighbor not in predecessors: stack.push(neighbor) #print(stack) predecessors[neighbor] = current_cell return None
.reduceByKey(lambda x, y: x + y).collect() topics = sorted(counts, reverse=True, key=lambda x: x[1]) # # train test split for topicN in [5, 10, 15, 20]: selected = [t[0] for t in topics[:topicN]] filtered = group_rdd \ .filter(lambda x: x[0] in selected) descriptions = filtered.map(lambda x: x[1]).collect() labels = filtered.map(lambda x: x[0]).collect() # mapping topic names to ids mapping = load_topic_mapping() labels = [mapping[l] for l in labels] # train test split X_train, X_test, y_train, y_test = train_test_split( descriptions, labels, test_size=0.2, random_state=42 ) train = (X_train, y_train) test = (X_test, y_test) # file name data_path = get_path('data', 'documents') train_file = os.path.join(data_path, f'train_{topicN:02d}') test_file = os.path.join(data_path, f'test_{topicN:02d}') # save pickle.dump(train, open(train_file, 'wb')) pickle.dump(test, open(test_file, 'wb'))
def load_file(dir_name, file_name): file = get_path('results', dir_name, file_name) pickle.load(open(file, 'rb'))
def save_file(to_save, dir_name, file_name): file = get_path('results', dir_name, file_name) pickle.dump(to_save, open(file, 'wb'))
def load_dataset(data_type, topic_num): file = get_path('data', 'documents', f'{data_type}_{topic_num:02d}') return pickle.load(open(file, 'rb'))
def load_model_info(): file = get_path('data', 'models_info.json') return json.load(open(file, 'r'))
Ruosi Wang [email protected] """ from helper import load_cities, get_path from spark_data import make_DF import os import numpy as np from datetime import datetime from pytz import timezone from pyspark import SparkContext from pyspark.sql import types import pyspark.sql.functions as F from pyspark.sql.functions import col, udf occurrence_path = get_path('results', 'EDA', 'occurrence') venues_path = get_path('results', 'EDA', 'venues') # ------------------------------------------------- # # helper functions for transforming datetime def floor_req(val): if val >= 2: return 1 else: return 0 def timezone_convert(dt, tz): return dt.astimezone(timezone(tz))
import pickle import os import time import numpy as np from sklearn.metrics import confusion_matrix, f1_score, accuracy_score from sklearn.pipeline import Pipeline from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.feature_selection import SelectKBest, chi2 from sklearn.ensemble import ExtraTreesClassifier, RandomForestClassifier from sklearn.naive_bayes import MultinomialNB from sklearn.svm import SVC models_path = get_path('results', 'models') def build_SKM(model_type=None, max_features=None, selectK=None, params={}): if not model_type: raise NameError('model_type is not defined') # Multinomial Naive Bayes if model_type == 'MNB': alpha = params.get('alpha', .01) pipe = Pipeline([('MNB', MultinomialNB(alpha=alpha))]) # Suport Vector Machine (Linear Kernel) if model_type == 'SVC': pipe = Pipeline([('SVC', SVC(kernel='linear'))]) # Extra Tree Classifier