/
similarity_population.py
105 lines (89 loc) · 3.45 KB
/
similarity_population.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
#!/usr/bin/python
# -*- coding: utf-8 -*-
import matplotlib.pyplot as plt
import numpy as np
import numpy.linalg as la
import pandas as pd
from pandas import *
import csv
import sys
import locale
from sklearn import preprocessing
import similarity_measure as sm
from math import*
def to_float(a):
for i in range(len(a)):
a[i] = float(a[i])
return a
def get_table(label, table):
table = all_sub_features[label]
table = table.values.tolist()
return table
def similarity_sub(a, b, num):
return sm.dis_to_sim(sm.bhatta_distance(a, b, num))
all_sub_features = pd.read_csv('all_sub_all_category.csv')
# sub_a = int(sys.argv[1]) - 1
# sub_b = int(sys.argv[2]) - 1
age_0_4 = []
age_5_9 = []
age_10_14 = []
age_15_19 = []
age_20_24 = []
age_25_44 = []
age_45_64 = []
age_65_69 = []
age_70_74 = []
age_75_79 = []
age_80_84 = []
age_85 = []
pharmacies = []
hospitaldist = []
hospitaltime = []
aged_Care_High_Care = []
aged_Care_Low_Care = []
kinder_Childcare = []
primary_Schools = []
secondary_Schools = []
age_0_4 = to_float(get_table('2012 ERP age 0-4, %', age_0_4))
age_5_9 = to_float(get_table('2012 ERP age 5-9, %', age_5_9))
age_10_14 = to_float(get_table('2012 ERP age 10-14, %', age_10_14))
age_15_19 = to_float(get_table('2012 ERP age 15-19, %', age_15_19))
age_20_24 = to_float(get_table('2012 ERP age 20-24, %', age_20_24))
age_25_44 = to_float(get_table('2012 ERP age 25-44, %', age_25_44))
age_45_64 = to_float(get_table('2012 ERP age 45-64, %', age_45_64))
age_65_69 = to_float(get_table('2012 ERP age 65-69, %', age_65_69))
age_70_74 = to_float(get_table('2012 ERP age 70-74, %', age_70_74))
age_75_79 = to_float(get_table('2012 ERP age 75-79, %', age_75_79))
age_80_84 = to_float(get_table('2012 ERP age 80-84, %', age_80_84))
age_85 = to_float(get_table('2012 ERP age 85+, %', age_85))
pharmacies = to_float(get_table('Pharmacies', pharmacies))
hospitaltime = to_float(get_table('Travel time to nearest public hospital', hospitaltime))
hospitaldist = to_float(get_table('Distance to nearest public hospital', hospitaldist))
aged_Care_High_Care = to_float(get_table('Aged Care (High Care)', aged_Care_High_Care))
aged_Care_Low_Care = to_float(get_table('Aged Care (Low Care)', aged_Care_Low_Care))
kinder_Childcare = to_float(get_table('Kinder and/or Childcare', kinder_Childcare))
primary_Schools = to_float(get_table('Primary Schools', primary_Schools))
secondary_Schools = to_float(get_table('Secondary Schools', secondary_Schools))
sub_id = all_sub_features['ID']
sub_id = sub_id.values.tolist()
id_features = []
id_features_2 = []
for i in range(34):
id_features.append([sub_id[i], age_0_4[i], age_5_9[i], age_10_14[i], age_15_19[i], age_20_24[i], age_25_44[i],
age_45_64[i], age_65_69[i], age_70_74[i], age_75_79[i], age_80_84[i], age_85[i]])
id_features_2.append([sub_id[i], pharmacies[i], hospitaltime[i], hospitaldist[i], aged_Care_Low_Care[i],
aged_Care_High_Care[i], kinder_Childcare[i], primary_Schools[i], secondary_Schools[i]])
# print (similarity_sub(id_features[sub_a][1:], id_features[sub_b][1:], 12))
matrix_value = []
# generate and output scores
def distance_metrix():
scores = []
for i in range(34):
score = []
for j in range(34):
dist1 = sm.bhatta_distance(id_features[i][1:],id_features[j][1:],12)
dist2 = sm.euclidean_distance(id_features_2[i][1:],id_features_2[j][1:])
score.append((dist1+dist2)/2)
scores.append(score)
return scores
# print(sm.bhatta_distance(id_features[2][1:],id_features[2][1:],12))