def test7(): #fetch list of countries and a dictionary of countries mapped with rows from csv for group1 country, country_result = util.country_group2() result=util.group2() labels = util.cluster_user_data(result) x1=[] len_cluster=3 j1=-1 country1=[] for j in range(len(country)): country_item=country[j] if len(country_result[country_item]) > 10: country1.append(country[j]) result=util.group2() labels = util.cluster_user_data(country_result[country_item]) j1=j1+1 result1=util.split_user_data(country_result[country_item], labels) for i in range(3): #print(i) #print(j) x1.append([]) x1[j1].append(result1[i]) len_country1=len(country1) #print(len_country) #print(country) return render_template('index1.html', labels_html=labels, column_html=column_names, data_html=result1[0], x1=x1, len_country=len_country1, country=country1)
def test6(): country, country_result = util.country_group1() for country_item in country: if len(country_result[country_item]) > 10: result = util.group1() labels = util.cluster_user_data(result) result1 = util.split_user_data(country_result[country_item], labels) return result1 else: result = util.group1() labels = util.cluster_user_data(result) return result
def index(): # list of group numbers groups = [1, 2, 3, 4] # list to hold group data user_data = [] # list of all countries countries = ['USA', 'Canada', 'UK', 'Romania', 'Switzerland', 'Rwanda', 'Hong Kong', 'France', 'C yprus', \ 'Israel', 'Portugal', 'Ireland I', 'Germany', 'Australia', 'China', 'New Zealand', 'Palestine'] # loop to query data for each group for x in groups: # loop split data by country for country in countries: data = util.query('WebAppsDatabase.db', x, country) # loop to split data if there are more than 10 elements if len(data) >= 10: labels = util.cluster_user_data(data) data = util.split_user_data(data, labels) # adds data to user_data list user_data.append(data) # loop to see if an element is a list of split data # Only used to see which countries are at which index and have split data for data in user_data: if len(data) != 0: print(data[0][1]) print(user_data.index(data)) print("\n") if isinstance(data[0], list): country = util.get_country(data[0][0]) print("***SPLIT***") print(country) print(user_data.index(data)) print("\n") return render_template('index.html', column_html=column_names, data1usa1_html=user_data[0][0], data1usa2_html=user_data[0][1], data1usa3_html=user_data[0][2], data1canada_html=user_data[1], data1uk_html=user_data[2], data1romania1_html=user_data[3][0], data1romania2_html=user_data[3][1], data1romania3_html=user_data[3][2], data1switz_html=user_data[4], data1rwanda_html=user_data[5], data1isreal_html=user_data[9], data1germany_html=user_data[12], data2usa1_html=user_data[17][0], data2usa2_html=user_data[17][1], data2usa3_html=user_data[17][2], data2canada_html=user_data[18], data2uk_html=user_data[19], data2romania_html=user_data[20], data2switz_html=user_data[21], data2rwanda_html=user_data[22], data2hongkong_html=user_data[23], data2france_html=user_data[24], data2germany_html=user_data[29], data2nz_html=user_data[32], data2pal_html=user_data[33], data3usa1_html=user_data[34][0], data3usa2_html=user_data[34][1], data3usa3_html=user_data[34][2], data3canada_html=user_data[35], data3uk_html=user_data[36], data3romania1_html=user_data[37][0], data3romania2_html=user_data[37][1], data3romania3_html=user_data[37][2], data3switz1_html=user_data[38][0], data3switz2_html=user_data[38][1], data3switz3_html=user_data[38][2], data3rwanda_html=user_data[39], data3portugal_html=user_data[44], data3ireland_html=grp3ireland, data3germany_html=user_data[46], data3australia_html=user_data[47], data3china_html=user_data[48], data4usa1_html=user_data[51][0], data4usa2_html=user_data[51][1], data4usa3_html=user_data[51][2], data4canada1_html=user_data[52][0], data4canada2_html=user_data[52][1], data4canada3_html=user_data[52][2], data4uk_html=user_data[53], data4romania1_html=user_data[54][0], data4romania2_html=user_data[54][1], data4romania3_html=user_data[54][2], data4switz_html=user_data[55], data4portugal_html=user_data[61], data4germany_html=user_data[63], data4australia_html=user_data[64])
], [ 9, "USA", 39.0, "Male", 2, 2, 4, 1, 2, "Anger", "Impacts on day-to-day life", "Exercising", "Software Engineer" ], [ 10, "USA", 78.0, "Male", 5, 5, 5, 1, 1, "Anger", "National leadership incompetence", "Family", "Executive - retired" ] ] } labels = util.cluster_user_data(sample_data['user_data']) print("predicted labels are: ", labels) split_result = util.split_user_data(sample_data['user_data'], labels) print("Split original user data to:") print(split_result) print("Split_result length is: ", len(split_result)) # def cluster_user_data(input_data, emotional_col_start=4, emotional_col_end=9, n_clusters=3): # ''' # This function cluster user data based on KMeans algorithm # By default, it will split your data into three groups # ''' # # collect answers for five emotional questions # # which are located from 4th col to 9th col # emotional_data = [i[emotional_col_start:emotional_col_end] for i in input_data['user_data']] # # use kmeans to cluster data # kmeans = KMeans(n_clusters).fit(emotional_data) # # return cluster labels
def index(): list_of_tables = [ { "title": "All User Data" }, { "title": "Young Male Data" }, { "title": "Middle-Aged or Old Male Data" }, { "title": "Young Female Data" }, { "title": "Middle-Aged or Old Female Data" }, { "title": "Young Male Data USA" }, { "title": "Young Male Data Canada" }, { "title": "Middle-Aged or Old Male Data USA" }, { "title": "Middle-Aged or Old Male Data Canada" }, { "title": "Young Female Data USA" }, { "title": "Young Female Data Canada" }, { "title": "Middle-Aged or Old Female Data USA" }, { "title": "Middle-Aged or Old Female Data Canada" }, ] #Calling function, argument is path of folder where all CSV files are stored create_table(UPLOAD_FOLDER) table_df = pd.read_sql_table('we_are_not_alone_no_nan', con=engine) full_data = table_df.values list_of_tables[0]["data"] = full_data young_male_query = "select * from we_are_not_alone_no_nan where \"How old are you?\" <= 35 AND \"What is your gender?\" = \"Male\"" young_male_df = pd.read_sql(young_male_query, con=engine) young_male_data = young_male_df.values labels = util.cluster_user_data(full_data) list_of_tables[1]["data"] = young_male_data middle_aged_or_old_male_query = "select * from we_are_not_alone_no_nan where \"How old are you?\" >=36 AND \"What is your gender?\" = \"Male\"" middle_aged_or_old_male_df = pd.read_sql(middle_aged_or_old_male_query, con=engine) middle_aged_or_old_male_data = middle_aged_or_old_male_df.values list_of_tables[2]["data"] = middle_aged_or_old_male_data young_female_query = "select * from we_are_not_alone_no_nan where \"How old are you?\" <= 35 AND \"What is your gender?\" = \"Female\"" young_female_df = pd.read_sql(young_female_query, con=engine) young_female_data = young_female_df.values list_of_tables[3]["data"] = young_female_data middle_aged_or_old_female_query = "select * from we_are_not_alone_no_nan where \"How old are you?\" >=36 AND \"What is your gender?\" = \"Female\"" middle_aged_or_old_female_df = pd.read_sql(middle_aged_or_old_female_query, con=engine) middle_aged_or_old_female_data = middle_aged_or_old_female_df.values list_of_tables[4]["data"] = middle_aged_or_old_female_data young_male_usa_query = "select * from we_are_not_alone_no_nan where \"How old are you?\" <= 35 AND \"What is your gender?\" = \"Male\" AND \"What country do you live in?\" = \"USA\"" young_male_usa_df = pd.read_sql(young_male_usa_query, con=engine) young_male_usa_data = young_male_usa_df.values list_of_tables[5]["data"] = young_male_usa_data young_male_can_query = "select * from we_are_not_alone_no_nan where \"How old are you?\" <= 35 AND \"What is your gender?\" = \"Male\" AND \"What country do you live in?\" = \"Canada\"" young_male_can_df = pd.read_sql(young_male_can_query, con=engine) young_male_can_data = young_male_can_df.values list_of_tables[6]["data"] = young_male_can_data middle_aged_or_old_male_usa_query = "select * from we_are_not_alone_no_nan where \"How old are you?\" >=36 AND \"What is your gender?\" = \"Male\" AND \"What country do you live in?\" = \"USA\"" middle_aged_or_old_male_usa_df = pd.read_sql( middle_aged_or_old_male_usa_query, con=engine) middle_aged_or_old_male_usa_data = middle_aged_or_old_male_usa_df.values list_of_tables[7]["data"] = middle_aged_or_old_male_usa_data middle_aged_or_old_male_can_query = "select * from we_are_not_alone_no_nan where \"How old are you?\" >=36 AND \"What is your gender?\" = \"Male\" AND \"What country do you live in?\" = \"Canada\"" middle_aged_or_old_male_can_df = pd.read_sql( middle_aged_or_old_male_can_query, con=engine) middle_aged_or_old_male_can_data = middle_aged_or_old_male_can_df.values list_of_tables[8]["data"] = middle_aged_or_old_male_can_data young_female_usa_query = "select * from we_are_not_alone_no_nan where \"How old are you?\" <= 35 AND \"What is your gender?\" = \"Female\" AND \"What country do you live in?\" = \"USA\"" young_female_usa_df = pd.read_sql(young_female_usa_query, con=engine) young_female_usa_data = young_female_usa_df.values list_of_tables[9]["data"] = young_female_usa_data young_female_can_query = "select * from we_are_not_alone_no_nan where \"How old are you?\" <= 35 AND \"What is your gender?\" = \"Female\" AND \"What country do you live in?\" = \"Canada\"" young_female_can_df = pd.read_sql(young_female_can_query, con=engine) young_female_can_data = young_female_can_df.values list_of_tables[10]["data"] = young_female_can_data middle_aged_or_old_female_usa_query = "select * from we_are_not_alone_no_nan where \"How old are you?\" >=36 AND \"What is your gender?\" = \"Female\" AND \"What country do you live in?\" = \"USA\"" middle_aged_or_old_female_usa_df = pd.read_sql( middle_aged_or_old_female_usa_query, con=engine) middle_aged_or_old_female_usa_data = middle_aged_or_old_female_usa_df.values list_of_tables[11]["data"] = middle_aged_or_old_female_usa_data middle_aged_or_old_female_can_query = "select * from we_are_not_alone_no_nan where \"How old are you?\" >=36 AND \"What is your gender?\" = \"Female\" AND \"What country do you live in?\" = \"Canada\"" middle_aged_or_old_female_can_df = pd.read_sql( middle_aged_or_old_female_can_query, con=engine) middle_aged_or_old_female_can_data = middle_aged_or_old_female_can_df.values list_of_tables[12]["data"] = middle_aged_or_old_female_can_data for item in list_of_tables[5:]: if len(item["data"]) > 10: labels = util.cluster_user_data(item["data"]) groups = util.split_user_data(item["data"], labels) for i, table in enumerate(groups): dict = { "title": "%s KMeans Split #%d" % (item["title"], i + 1), "data": [] } dict["data"] = table list_of_tables += [dict] return render_template('index.html', labels_html=labels, column_html=column_names, data_html=list_of_tables)
def index(): conn = connect() cur = conn.cursor() COUNTRIES = "SELECT DISTINCT country FROM not_alone;" cur.execute(COUNTRIES) countries = cur.fetchall() subGroups = [] #Split data into 4 groups based on age and gender #group 1 GROUP_1 = "CREATE TABLE group_1 AS (SELECT * FROM not_alone WHERE age <= 35 AND gender = 'Male');" cur.execute(GROUP_1) #group 2 GROUP_2 = "CREATE TABLE group_2 AS (SELECT * FROM not_alone WHERE age >= 36 AND gender = 'Male');" cur.execute(GROUP_2) #group 3 GROUP_3 = "CREATE TABLE group_3 AS (SELECT * FROM not_alone WHERE age <= 35 AND gender = 'Female');" cur.execute(GROUP_3) #group 4 GROUP_4 = "CREATE TABLE group_4 AS (SELECT * FROM not_alone WHERE age >= 36 AND gender = 'Female');" cur.execute(GROUP_4) #split each group into smaller groups based on country and size for x in range(1,5): for row in countries: #drop TEMP table from pervious loop cycle cur.execute("DROP TABLE IF EXISTS TEMP;") #create table to store current subgroup in group = "GROUP_" + str(x) country = row[0] TEMP = "CREATE TABLE TEMP AS (SELECT * FROM " + group + " WHERE country = '" + country + "');" TEMPROWS = "SELECT * FROM TEMP;" cur.execute(TEMP) #get number of rows in table, to make sure subgroup is not empty cur.execute("SELECT COUNT(*) FROM TEMP;") rowCount = cur.fetchone()[0] #check if subgroup is empty if rowCount > 0: #if current country has more than 10 entries, split using kmeans cur.execute(TEMPROWS) subGroup = cur.fetchall() if rowCount >= 10: labels = util.cluster_user_data(subGroup) splitGroup = util.split_user_data(subGroup, labels) for y in splitGroup: subGroups.append(y) else: subGroups.append(subGroup) #pass groups to index.html return render_template('index.html', subGroups = subGroups, countries = countries, column_html = column_names)
def query_survey_results4(country_name=''): country, country_result = util.country_group3() result = util.group3() labels = util.cluster_user_data(result) x1 = [] len_cluster = 3 j1 = -1 country1 = [] for j in range(len(country)): country_item = country[j] if len(country_result[country_item]) > 10: country1.append(country[j]) result = util.group3() labels = util.cluster_user_data(country_result[country_item]) j1 = j1 + 1 result1 = util.split_user_data(country_result[country_item], labels) for i in range(3): #print(i) #print(j) x1.append([]) x1[j1].append(result1[i]) # print(j1) # print(i) # print(x1[j1][i]) group3 = x1 #print(group3) country, country_result = util.country_group1() #print(country_result) result = util.group1() labels = util.cluster_user_data(result) x1 = [] len_cluster = 3 j1 = -1 country1 = [] for j in range(len(country)): country_item = country[j] if len(country_result[country_item]) > 10: country1.append(country[j]) result = util.group1() labels = util.cluster_user_data(country_result[country_item]) j1 = j1 + 1 result1 = util.split_user_data(country_result[country_item], labels) for i in range(3): #print(i) #print(j) x1.append([]) x1[j1].append(result1[i]) group1 = x1 #print(group1) country, country_result = util.country_group2() result = util.group2() labels = util.cluster_user_data(result) x1 = [] len_cluster = 3 j1 = -1 country1 = [] for j in range(len(country)): country_item = country[j] if len(country_result[country_item]) > 10: country1.append(country[j]) result = util.group2() labels = util.cluster_user_data(country_result[country_item]) j1 = j1 + 1 result1 = util.split_user_data(country_result[country_item], labels) for i in range(3): #print(i) #print(j) x1.append([]) x1[j1].append(result1[i]) group2 = x1 #print(group2) country, country_result = util.country_group4() result = util.group4() labels = util.cluster_user_data(result) x1 = [] len_cluster = 3 j1 = -1 country1 = [] for j in range(len(country)): country_item = country[j] if len(country_result[country_item]) > 10: country1.append(country[j]) result = util.group4() labels = util.cluster_user_data(country_result[country_item]) j1 = j1 + 1 result1 = util.split_user_data(country_result[country_item], labels) for i in range(3): #print(i) #print(j) x1.append([]) x1[j1].append(result1[i]) # print(j1) # print(i) # print(x1[j1][i]) group4 = x1 #print(group4) dictionary_usa = {'country': ['Unites States Of America']} dictionary_romania = {'country': ['Romania']} dictionary_switzerland = {'country': ['Switzerland']} for i in range(2): for j in range(3): if (group1[i][j][1][1] == 'USA'): dictionary_usa['group1' + 'cluster' + str(j)] = [group1[i][j]] else: dictionary_romania['group1' + 'cluster' + str(j)] = [group1[i][j]] for i in range(1): for j in range(3): if (group2[i][j][1][1] == 'USA'): dictionary_usa['group2' + 'cluster' + str(j)] = [group2[i][j]] else: dictionary_romania['group2' + 'cluster' + str(j)] = [group2[i][j]] for i in range(3): for j in range(3): if (group3[i][j][1][1] == 'USA'): dictionary_usa['group3' + 'cluster' + str(j)] = [group3[i][j]] else: if (group3[i][j][1][1] == 'Switzerland'): dictionary_switzerland['group3' + 'cluster' + str(j)] = [group3[i][j]] else: dictionary_romania['group3' + 'cluster' + str(j)] = [group3[i][j]] for i in range(2): for j in range(3): if (group4[i][j][1][1] == 'USA'): dictionary_usa['group4' + 'cluster' + str(j)] = [group4[i][j]] else: dictionary_romania['group4' + 'cluster' + str(j)] = [group4[i][j]] dictionary_usa = str(dictionary_usa) dictionary_usa = dictionary_usa.replace('(', '[') dictionary_usa = dictionary_usa.replace(')', ']') dictionary_usa = eval(dictionary_usa) dictionary_romania = str(dictionary_romania) dictionary_romania = dictionary_romania.replace('(', '[') dictionary_romania = dictionary_romania.replace(')', ']') dictionary_romania = eval(dictionary_romania) dictionary_switzerland = str(dictionary_switzerland) dictionary_switzerland = dictionary_switzerland.replace('(', '[') dictionary_switzerland = dictionary_switzerland.replace(')', ']') dictionary_switzerland = eval(dictionary_switzerland) #print(group1[0][0]) return json.dumps(dictionary_romania)