Python computeClustersAndOrganizeData Examples, prediction.fwdfiles.cluster_functions.computeClustersAndOrganizeData Python Examples

Example #1

0

Show file

File: views.py Project: kathleen-xue/crime

def cluster(request, dataset, gridshape, threshold):
    if dataset == 'dps':
        data = pd.read_pickle(os.path.abspath(
            './prediction/dataset/DPSUSC.pkl'))
        ignoreFirst = 225
        lon_min = -118.301895
        lon_max = -118.27
        lat_min = 34.015
        lat_max = 34.0366
        print(data)
    elif dataset == 'la':
        data = pd.read_pickle(os.path.abspath(
            './prediction/dataset/LAdata.pkl'))
        lon_min = -118.301895
        lon_max = -118.27
        lat_min = 34.015
        lat_max = 34.0366
        ignoreFirst = 104
    else:
        response = HttpResponse(
            'Wrong dataset. Should choose from \'dps\' or \'la\'')
        response.status_code = 422
        return response
    try:
        clusters, realCrimes = computeClustersAndOrganizeData(
            data, gridshape, ignoreFirst, threshold, 1)
        border_result = []
        crime_counts = []
        for geometry in clusters.Geometry:
            num_of_crimes = 0
            geometry_array = geometry.toarray()
            border_set = set()
            for i in range(len(geometry_array)):
                row = geometry_array[i]
                for j in range(len(row)):
                    if row[j] == 0:
                        continue
                    borders = getBorderCordinates(
                        lon_max, lon_min, lat_max, lat_min, gridshape, i, j)
                    for border in borders:
                        if border not in border_set:
                            border_set.add(border)
                        else:
                            border_set.remove(border)
                    num_of_crimes += row[j]
            border_result.append(list(border_set))
            crime_counts.append(num_of_crimes)
            num_of_crimes = 0
        response = HttpResponse(pd.io.json.dumps(
            [border_result, crime_counts]))
        response.status_code = 200
        return response
    except:
        response = HttpResponse('Internal server error with the followint inputs:\ndataset: {}, gridshape: {}, threshold: {}'.format(
            dataset, gridshape, threshold))
        response.status_code = 400
        return response

Example #2

0

Show file

File: views.py Project: kathleen-xue/crime

def cluster2(request):
    print("CLUSTERING")
    pd.options.display.precision = 10
    body_unicode = request.body.decode('utf-8')
    body = json.loads(body_unicode)
    features = body['features']
    threshold = int(body['threshold'])
    gridshape = literal_eval(body['gridShape'])
    entries = []
    lon_min = -118.297
    lon_max = -118.27
    lat_min = 34.015
    lat_max = 34.038
    print(len(features))
    for f in features:
        d = f['properties']['time'].split('T')[0]
        y, m, d = map(int, d.split('-'))
        if y == 0:
            continue
        if float(f['properties']['latitude']) >= lat_min and float(f['properties']['latitude']) <= lat_max and float(f['properties']['longitude']) >= lon_min and float(f['properties']['longitude']) <= lon_max:
            entries.append([f['properties']['Category'], float(f['properties']['latitude']), float(f['properties']['longitude']), datetime.date(y, m, d)])
    print("DONE PREPROCESSING")
    data = pd.DataFrame(np.array(entries), columns=['Category', 'Latitude', 'Longitude', "Date"])
    data.sort_values(['Latitude', 'Longitude', 'Date'], inplace=True)
    data = data.reset_index(drop=True)
    ignoreFirst = 225
    clusters, realCrimes = computeClustersAndOrganizeData(data, gridshape, ignoreFirst, threshold, 1)
    border_result = []
    crime_counts = []
    for geometry in clusters.Geometry:
        num_of_crimes = 0
        geometry_array = geometry.toarray()
        border_set = set()
        for i in range(len(geometry_array)):
            row = geometry_array[i]
            for j in range(len(row)):
                if row[j] == 0:
                    continue
                borders = getBorderCordinates(
                    lon_max, lon_min, lat_max, lat_min, gridshape, i, j)
                for border in borders:
                    if border not in border_set:
                        border_set.add(border)
                    else:
                        border_set.remove(border)
                num_of_crimes += row[j]
        border_result.append(list(border_set))
        crime_counts.append(num_of_crimes)
        num_of_crimes = 0
    print(pd.io.json.dumps([border_result, crime_counts]))
    return HttpResponse(pd.io.json.dumps([border_result, crime_counts]))

Example #3

0

Show file

def heterogeneousCluster(request):
    pd.options.display.precision = 10
    body_unicode = request.body.decode('utf-8')
    body = json.loads(body_unicode)
    features = body['features']
    threshold = int(body['threshold'])
    gridshape = literal_eval(body['gridShape'])
    data = convertFromFeaturesToData(features)
    ignoreFirst = 225
    clusters, realCrimes = computeClustersAndOrganizeData(
        data, gridshape, ignoreFirst, threshold, 1)
    border_result = []
    crime_counts = []
    for geometry in clusters.Geometry:
        num_of_crimes = 0
        geometry_array = geometry.toarray()
        border_set = set()
        for i in range(len(geometry_array)):
            row = geometry_array[i]
            for j in range(len(row)):
                if row[j] == 0:
                    continue
                borders = getBorderCordinates(lon_max, lon_min, lat_max,
                                              lat_min, gridshape, i, j)
                for border in borders:
                    if border not in border_set:
                        border_set.add(border)
                    else:
                        border_set.remove(border)
                num_of_crimes += row[j]
        neighbor_map = {}
        for border in border_set:
            addEdge(neighbor_map, border[:2], border[2:])
        print('Start Finding Eulerian path...')
        node_path = []
        start_point = next(iter(border_set))[:2]
        fleuryEulerianCircuit(neighbor_map, start_point, node_path)
        border_result.append(node_path)
        crime_counts.append(num_of_crimes)
    resp = HttpResponse(
        pd.io.json.dumps([border_result, crime_counts, clusters, realCrimes]))
    resp['Access-Control-Allow-Origin'] = '*'
    return resp

Example #4

0

Show file

def evaluate(request):
    maxDist = 1
    ignoreFirst = 225
    isModelEvaluation = True
    body_unicode = request.body.decode('utf-8')
    body = json.loads(body_unicode)
    periodsAhead_list = [int(body['periodsAhead'])]
    isRetrainingModel = True
    features = body['features']
    data = convertFromFeaturesToData(features)
    thresholds = body['thresholds']
    gridshapes = [tuple(gridshape) for gridshape in body['gridshapes']]
    methods = body['methods']
    resource_indexes = body['resource_indexes']
    for gridshape in gridshapes:
        for threshold in thresholds:
            clusters, realCrimes = computeClustersAndOrganizeData(
                data, gridshape, ignoreFirst, threshold, 1)
            clusters = clusters.to_dict()
            realCrimes = realCrimes.to_dict()
            for method in methods:
                if method == "LSTM":
                    result_path = forecast_LSTM(
                        clusters=clusters,
                        realCrimes=realCrimes,
                        periodsAhead_list=periodsAhead_list,
                        gridshape=gridshape,
                        ignoreFirst=ignoreFirst,
                        threshold=threshold,
                        maxDist=maxDist,
                        isRetraining=isRetrainingModel,
                        isModelEvaluation=isModelEvaluation)
                elif method == "ARIMA" or method == "AR":
                    result_path = forecast_ARIMA(
                        method=method,
                        clusters=clusters,
                        realCrimes=realCrimes,
                        periodsAhead_list=periodsAhead_list,
                        gridshape=gridshape,
                        ignoreFirst=ignoreFirst,
                        threshold=threshold,
                        maxDist=maxDist,
                        isRetraining=isRetrainingModel,
                        isModelEvaluation=isModelEvaluation)
                else:
                    result_path = forecast_MM(
                        method=method,
                        clusters=clusters,
                        realCrimes=realCrimes,
                        periodsAhead_list=periodsAhead_list,
                        gridshape=gridshape,
                        ignoreFirst=ignoreFirst,
                        threshold=threshold,
                        maxDist=maxDist,
                        isModelEvaluation=isModelEvaluation)
                compute_resource_allocation(resource_indexes, 1, [gridshape],
                                            periodsAhead_list, ignoreFirst,
                                            [threshold], 1, [method], lon_min,
                                            lon_max, lat_min, lat_max)
    fig, ax = plt.subplots(1, len(methods), figsize=(18, 5), sharey=True)
    if len(methods) == 1:
        ax = [ax]
    plot_resource_allocation(ax, gridshapes, periodsAhead_list[0], thresholds,
                             methods, ignoreFirst)
    os.makedirs(os.path.abspath("results/"), exist_ok=True)
    os.makedirs(os.path.abspath("results/plots"), exist_ok=True)
    image_path = os.path.abspath('results/plots/{}-week-ahead.png'.format(
        periodsAhead_list[0]))
    plt.savefig(image_path, dpi=300)
    with open(image_path, "rb") as imageFile:
        image_data = base64.b64encode(imageFile.read())
    response = HttpResponse(pd.io.json.dumps(image_data))
    response['Access-Control-Allow-Origin'] = '*'
    response.status_code = 200
    return response