예제 #1
0
    def plot_crimes_by_coordinates():
        category = "DRUNKENNESS"
        data = DataDAO.get_data_from_csv('train.csv')

        # x=numpy.array([])
        # y=numpy.array([])
        x = []
        y = []
        sf_coordinates_x = [-122.6, -122.35]
        sf_coordinates_y = [35, 40 ]


        for ind,raw in enumerate(data):
            if raw[0] != 'Dates' and raw[1] == category:
                coord = DataDAO.get_coordinates_tr(raw)
                if sf_coordinates_x[0] < coord[0] < sf_coordinates_x[1] and sf_coordinates_y[0] < coord[1] < sf_coordinates_y[1]:
                    x.append(coord[0])
                    y.append(coord[1])
                # numpy.append(x,[coord[0]])
                # numpy.append(y,[coord[1]])
                # if ind > 100000:
                #     break

        #heat map

        # heatmap, xedges, yedges = numpy.histogram2d(x, y, bins=50)
        # extent = [xedges[0], xedges[-1], yedges[0], yedges[-1]]
        # plt.subplot(2,1,2)
        # plt.imshow(heatmap, extent=extent)

        #scatter plot
        # plt.subplot(2,1,1)
        plt.scatter(x,y,s=1)

        plt.show()
    def get_test_data(limit=0):
        matrix = []
        test_data = DataDAO.get_test_vector()
        for ind, vector in enumerate(test_data):
            if limit != 0 and ind >= limit:
                break
            matrix.append(vector)
        # sparse = csr_matrix(scaled_matrix)

        return matrix
    def get_test_data(limit=0):
        matrix = []
        test_data = DataDAO.get_test_vector()
        for ind, vector in enumerate(test_data):
            if limit != 0 and ind >= limit:
                break
            matrix.append(vector)
        # sparse = csr_matrix(scaled_matrix)

        return matrix
    def build_sparse_matrix_target(limit=0):
        targets_vector = DataDAO.get_targets()
        targets_matrix = []
        for ind, item in enumerate(targets_vector):
            if limit != 0 and ind >= limit:
                break
            multiclass_vector = TrainingFactory.get_category_vector(item)
            targets_matrix.append(multiclass_vector)

        sparse = csr_matrix(targets_matrix)
        return sparse
    def build_target_vector_by_category(category, limit=0):
        targets_vector = DataDAO.get_targets()
        targets_cat = []
        for ind, item in enumerate(targets_vector):
            if limit != 0 and ind >= limit:
                break
            if item == category:
                targets_cat.append(1)
            else:
                targets_cat.append(0)

        return targets_cat
    def build_sparse_matrix_input(limit=0):
        matrix = []
        datos = DataDAO.get_train_vector()
        for ind, vector in enumerate(datos):
            if limit != 0 and ind >= limit:
                break
            matrix.append(vector)


        scaled_matrix = scale(matrix, axis=0)
        sparse = csr_matrix(scaled_matrix)

        return sparse
    def build_sparse_matrix_input(limit=0):
        matrix = []
        datos = DataDAO.get_train_vector()
        for ind, vector in enumerate(datos):
            if limit != 0 and ind >= limit:
                break
            matrix.append(vector)

        #center data for 0 mean and sd
        scaled_matrix = scale(matrix)
        sparse = csr_matrix(scaled_matrix)

        # input csrm
        return sparse
예제 #8
0
    def plot_crime_category_by_date_time(category):
        datos = DataDAO.get_time_features_by_category(category)

        dia_year = datos[:,0]
        segundo_del_dia = datos[:,1]
        dia_semana = datos[:,2]

        plt.figure()
        plt.suptitle(category)

        plt.subplot(3,1,1)
        plt.hist(dia_semana, bins=7)
        plt.title("Crime by day of week")

        plt.subplot(3,1,2)
        plt.hist(dia_year, bins=12)
        plt.title("Crime by month")

        plt.subplot(3,1,3)
        plt.hist(segundo_del_dia, bins=24)
        plt.title("Crime by hour")

        plt.show()
        return plt
    def get_training_data_by_category(category, limit=0):
        limit_pos = limit*0.2
        limit_neg = limit*0.8
        N_pos = DataDAO.count_training_data_by_category(category)
        if N_pos < limit_pos:
            limit_pos = N_pos
            limit_neg = N_pos*5

        training_data = []
        training_target = []
        positive = DataDAO.get_training_data_by_category(category)
        for ind, sample in enumerate(positive):
            if limit != 0 and ind >= limit_pos:
                break
            training_data.append(sample)
            training_target.append(1)
        negative = DataDAO.get_training_data_by_other_categories(category)
        for ind, sample in enumerate(negative):
            if limit != 0 and ind >= limit_neg:
                break
            training_data.append(sample)
            training_target.append(0)

        scaler = MinMaxScaler()
        training_data_scaled = scaler.fit_transform(training_data)

        # training_data_scaled = scale(training_data,axis=0)
        tr_data_sparse = csr_matrix(training_data_scaled)

        return tr_data_sparse, training_target, scaler