def plot_crimes_by_coordinates(): category = "DRUNKENNESS" data = DataDAO.get_data_from_csv('train.csv') # x=numpy.array([]) # y=numpy.array([]) x = [] y = [] sf_coordinates_x = [-122.6, -122.35] sf_coordinates_y = [35, 40 ] for ind,raw in enumerate(data): if raw[0] != 'Dates' and raw[1] == category: coord = DataDAO.get_coordinates_tr(raw) if sf_coordinates_x[0] < coord[0] < sf_coordinates_x[1] and sf_coordinates_y[0] < coord[1] < sf_coordinates_y[1]: x.append(coord[0]) y.append(coord[1]) # numpy.append(x,[coord[0]]) # numpy.append(y,[coord[1]]) # if ind > 100000: # break #heat map # heatmap, xedges, yedges = numpy.histogram2d(x, y, bins=50) # extent = [xedges[0], xedges[-1], yedges[0], yedges[-1]] # plt.subplot(2,1,2) # plt.imshow(heatmap, extent=extent) #scatter plot # plt.subplot(2,1,1) plt.scatter(x,y,s=1) plt.show()
def get_test_data(limit=0): matrix = [] test_data = DataDAO.get_test_vector() for ind, vector in enumerate(test_data): if limit != 0 and ind >= limit: break matrix.append(vector) # sparse = csr_matrix(scaled_matrix) return matrix
def build_sparse_matrix_target(limit=0): targets_vector = DataDAO.get_targets() targets_matrix = [] for ind, item in enumerate(targets_vector): if limit != 0 and ind >= limit: break multiclass_vector = TrainingFactory.get_category_vector(item) targets_matrix.append(multiclass_vector) sparse = csr_matrix(targets_matrix) return sparse
def build_target_vector_by_category(category, limit=0): targets_vector = DataDAO.get_targets() targets_cat = [] for ind, item in enumerate(targets_vector): if limit != 0 and ind >= limit: break if item == category: targets_cat.append(1) else: targets_cat.append(0) return targets_cat
def build_sparse_matrix_input(limit=0): matrix = [] datos = DataDAO.get_train_vector() for ind, vector in enumerate(datos): if limit != 0 and ind >= limit: break matrix.append(vector) scaled_matrix = scale(matrix, axis=0) sparse = csr_matrix(scaled_matrix) return sparse
def build_sparse_matrix_input(limit=0): matrix = [] datos = DataDAO.get_train_vector() for ind, vector in enumerate(datos): if limit != 0 and ind >= limit: break matrix.append(vector) #center data for 0 mean and sd scaled_matrix = scale(matrix) sparse = csr_matrix(scaled_matrix) # input csrm return sparse
def plot_crime_category_by_date_time(category): datos = DataDAO.get_time_features_by_category(category) dia_year = datos[:,0] segundo_del_dia = datos[:,1] dia_semana = datos[:,2] plt.figure() plt.suptitle(category) plt.subplot(3,1,1) plt.hist(dia_semana, bins=7) plt.title("Crime by day of week") plt.subplot(3,1,2) plt.hist(dia_year, bins=12) plt.title("Crime by month") plt.subplot(3,1,3) plt.hist(segundo_del_dia, bins=24) plt.title("Crime by hour") plt.show() return plt
def get_training_data_by_category(category, limit=0): limit_pos = limit*0.2 limit_neg = limit*0.8 N_pos = DataDAO.count_training_data_by_category(category) if N_pos < limit_pos: limit_pos = N_pos limit_neg = N_pos*5 training_data = [] training_target = [] positive = DataDAO.get_training_data_by_category(category) for ind, sample in enumerate(positive): if limit != 0 and ind >= limit_pos: break training_data.append(sample) training_target.append(1) negative = DataDAO.get_training_data_by_other_categories(category) for ind, sample in enumerate(negative): if limit != 0 and ind >= limit_neg: break training_data.append(sample) training_target.append(0) scaler = MinMaxScaler() training_data_scaled = scaler.fit_transform(training_data) # training_data_scaled = scale(training_data,axis=0) tr_data_sparse = csr_matrix(training_data_scaled) return tr_data_sparse, training_target, scaler