Esempio n. 1
0
def predict():
    """
    Method: POST
    Predict topic ids from given docs

    :return: topic ids
    """
    content = request.json
    if not isinstance(content, dict):
        return 'Not json format', 400
    if "docs" not in content:
        return 'field `docs` not present', 400
    docs = content['docs']
    if not isinstance(docs, list) or len(docs) == 0:
        return '`docs` should be list and not empty', 400
    for doc in docs:
        if not isinstance(doc, str):
            return 'Wrong data type of doc', 400
    topics = kmeans.predict(docs).tolist()
    return {'topics': topics}, 200
Esempio n. 2
0
def process(selectedCrime, type):
    colors = [
        'red', 'orange', 'yellow', 'green', 'blue', 'purple', 'black', 'white',
        'cyan', 'brown'
    ]

    finalData = {}
    for year in range(2005, 2017):
        print(year)
        finalData[str(year)] = {}
        for month in range(1, 13):
            print('    ' + str(month))
            finalData[str(year)][str(month)] = {}

            for num_clusters in range(2, 11):
                print('        ' + str(num_clusters))
                points = {"type": "FeatureCollection", "features": []}

                finalData[str(year)][str(month)][str(num_clusters)] = {}
                clusterData = [[] for Null in range(num_clusters)]

                geodesic_clusters, coordinate_array, data_array = get_clusters(
                    selectedCrime=selectedCrime,
                    type='geodesic',
                    month=month,
                    year=year,
                    num_clusters=num_clusters)
                euclidean_clusters, co, da = get_clusters(
                    selectedCrime=selectedCrime,
                    type='euclidean',
                    month=month,
                    year=year,
                    num_clusters=num_clusters)
                for coor in coordinate_array:
                    c_geodesic = predict(
                        num_clusters,
                        geodesic_clusters,
                        [float(coor[0]), float(coor[1])],
                        type='geodesic')
                    c_euclidean = predict(
                        num_clusters,
                        euclidean_clusters,
                        [float(coor[0]), float(coor[1])],
                        type='euclidean')
                    clusterData[int(c_geodesic)].append(
                        [float(coor[1]), float(coor[0])])
                    points["features"].append({
                        "geometry": {
                            "type": "Point",
                            "coordinates": [float(coor[1]),
                                            float(coor[0])]
                        },
                        "type": "Feature",
                        "properties": {
                            "fillColor": colors[c_euclidean],
                            "euclidean_cluster": c_euclidean,
                            "geodesic_cluster": c_geodesic,
                            "popupContent": ""
                        },
                    })

                finalData[str(year)][str(month)][str(
                    num_clusters)]['points'] = points

                for i in range(0, num_clusters):
                    try:
                        finalData[str(year)][str(month)][str(num_clusters)][
                            str(i)] = build_cluster_data(
                                clusterData[i],
                                selected_crime=selectedCrime,
                                cluster=i)
                    except:
                        try:
                            finalData[str(year)][str(month)][str(
                                num_clusters)][str(i)] = build_cluster_data(
                                    clusterData[i],
                                    selected_crime=selectedCrime,
                                    cluster=i)
                        except:
                            print('continuing')
                            continue

    fn = open('../static/' + selectedCrime + '/' + type + '_final_points.js',
              'w')
    fn.write('var geodesic_data = ')
    fn.write(json.dumps(finalData))
    fn.write(';\n')
    fn.close()
Esempio n. 3
0
from sklearn.datasets import load_digits
from sklearn.metrics import fowlkes_mallows_score
from sklearn.cluster import AgglomerativeClustering, AffinityPropagation
from kmeans import kmeans, Point, predict

data, target = load_digits(return_X_y=True)

# K-Means
kmeans_data = [Point(val) for val in data]
k_means = kmeans(kmeans_data, 10)

labels = []

for point in data:
    labels.append(predict(k_means, Point(point)))
target = [int(num) for num in target]

results = [[0 for _ in range(10)] for __ in range(10)]

for i, val in enumerate(labels):
    results[target[i]][val] += 1

conversion = {}
for t_i, targ in enumerate(results):
    max_cluster = None
    for c_i, cluster in enumerate(targ):
        if max_cluster is None or cluster > targ[max_cluster]:
            max_cluster = c_i

        conversion[t_i] = max_cluster
Esempio n. 4
0
print "source data's length: ", len(tranSourceFeature)

#experiment 1
testData, testLabel = readTestData(targetPath)
print "target outlier length: ", len(testLabel)
ori_testData = testData
K = 5
transferMatrix = np.array(transferMatrix).T
testData = np.array(testData).T
outlierTest = dot(transferMatrix, testData)
outlierTest = outlierTest.T
outlierTest.tolist()

sigma = 0.0005
kernelTranSourceFeature = kernel(tranSourceFeature, sigma)
centroids, clusterAssment, radiusCluster = kmeansAlgorithm(
    mat(kernelTranSourceFeature), K)
numSamples = len(data)
# clusterAssment = mat(zeros((numSamples, 1)))
# for i in xrange(numSamples):
#     clusterAssment[i, :] = predict_label[i]
purity = calcPurity(clusterAssment, K, sourceLabel)
print "purity", purity

sumPurity = 0.0
for i in purity:
    sumPurity += i

print "avg purity", sumPurity / len(purity)
predict(mat(kernel(tranTargetFeature, sigma)), centroids, K, radiusCluster)
Esempio n. 5
0
# Pulses, E, PVI
prefix = os.environ["INFUSION_DATASETS"]
data = torch.load(f'{prefix}/pulses_E_PVI.pt')
# Clustering
km = torch.load("_kmeans/km.pt")
# Model
st = torch.load('st/out64/mon3')
model.load_state_dict(st)
model.train(False)

#--- Distribution of E within cluster ---

x = data["pulses"].view([-1, 128])
y = model(x)
c = kmeans.predict(km, y)

E = (data["E"].unsqueeze(1).repeat(1, 256).view([-1]))

df = pd.DataFrame({"E": E.numpy(), "C": c.numpy()})
pivot = df.pivot(columns="C", values="E")


def seriesE(i):
    return pivot[i][pivot[i].notnull()]


ax = sns.catplot(y="E",
                 col="C",
                 col_wrap=8,
                 kind="violin",