Esempio n. 1
0
def test7():
	#fetch list of countries and a dictionary of countries mapped with rows from csv for group1
	country, country_result = util.country_group2()
	result=util.group2()
	labels = util.cluster_user_data(result)
	x1=[]

	len_cluster=3
	j1=-1
	country1=[]
	for j  in range(len(country)):
		country_item=country[j]

		if len(country_result[country_item]) > 10:
			country1.append(country[j])
			result=util.group2()
			labels = util.cluster_user_data(country_result[country_item])
			j1=j1+1
			result1=util.split_user_data(country_result[country_item], labels)
			for i in range(3):
				#print(i)
				#print(j)
				x1.append([])
				x1[j1].append(result1[i])



	len_country1=len(country1)
	#print(len_country)
	#print(country)
	return render_template('index1.html', labels_html=labels, column_html=column_names, data_html=result1[0], x1=x1, len_country=len_country1, country=country1)
Esempio n. 2
0
def test6():
    country, country_result = util.country_group1()
    for country_item in country:
        if len(country_result[country_item]) > 10:
            result = util.group1()
            labels = util.cluster_user_data(result)
            result1 = util.split_user_data(country_result[country_item],
                                           labels)
            return result1
        else:
            result = util.group1()
            labels = util.cluster_user_data(result)
            return result
Esempio n. 3
0
def index():

    # list of group numbers
    groups = [1, 2, 3, 4]

    # list to hold group data
    user_data = []

    # list of all countries
    countries = ['USA', 'Canada', 'UK', 'Romania', 'Switzerland', 'Rwanda', 'Hong Kong', 'France', 'C yprus', \
                 'Israel', 'Portugal', 'Ireland I', 'Germany', 'Australia', 'China', 'New Zealand', 'Palestine']

    # loop to query data for each group
    for x in groups:

        # loop split data by country
        for country in countries:
            data = util.query('WebAppsDatabase.db', x, country)

            # loop to split data if there are more than 10 elements
            if len(data) >= 10:
                labels = util.cluster_user_data(data)
                data = util.split_user_data(data, labels)

            # adds data to user_data list
            user_data.append(data)

    # loop to see if an element is a list of split data
    # Only used to see which countries are at which index and have split data
    for data in user_data:
        if len(data) != 0:
            print(data[0][1])
            print(user_data.index(data))
            print("\n")
            if isinstance(data[0], list):
                country = util.get_country(data[0][0])
                print("***SPLIT***")
                print(country)
                print(user_data.index(data))
                print("\n")

    return render_template('index.html',
                           column_html=column_names,
                           data1usa1_html=user_data[0][0],
                           data1usa2_html=user_data[0][1],
                           data1usa3_html=user_data[0][2],
                           data1canada_html=user_data[1],
                           data1uk_html=user_data[2],
                           data1romania1_html=user_data[3][0],
                           data1romania2_html=user_data[3][1],
                           data1romania3_html=user_data[3][2],
                           data1switz_html=user_data[4],
                           data1rwanda_html=user_data[5],
                           data1isreal_html=user_data[9],
                           data1germany_html=user_data[12],
                           data2usa1_html=user_data[17][0],
                           data2usa2_html=user_data[17][1],
                           data2usa3_html=user_data[17][2],
                           data2canada_html=user_data[18],
                           data2uk_html=user_data[19],
                           data2romania_html=user_data[20],
                           data2switz_html=user_data[21],
                           data2rwanda_html=user_data[22],
                           data2hongkong_html=user_data[23],
                           data2france_html=user_data[24],
                           data2germany_html=user_data[29],
                           data2nz_html=user_data[32],
                           data2pal_html=user_data[33],
                           data3usa1_html=user_data[34][0],
                           data3usa2_html=user_data[34][1],
                           data3usa3_html=user_data[34][2],
                           data3canada_html=user_data[35],
                           data3uk_html=user_data[36],
                           data3romania1_html=user_data[37][0],
                           data3romania2_html=user_data[37][1],
                           data3romania3_html=user_data[37][2],
                           data3switz1_html=user_data[38][0],
                           data3switz2_html=user_data[38][1],
                           data3switz3_html=user_data[38][2],
                           data3rwanda_html=user_data[39],
                           data3portugal_html=user_data[44],
                           data3ireland_html=grp3ireland,
                           data3germany_html=user_data[46],
                           data3australia_html=user_data[47],
                           data3china_html=user_data[48],
                           data4usa1_html=user_data[51][0],
                           data4usa2_html=user_data[51][1],
                           data4usa3_html=user_data[51][2],
                           data4canada1_html=user_data[52][0],
                           data4canada2_html=user_data[52][1],
                           data4canada3_html=user_data[52][2],
                           data4uk_html=user_data[53],
                           data4romania1_html=user_data[54][0],
                           data4romania2_html=user_data[54][1],
                           data4romania3_html=user_data[54][2],
                           data4switz_html=user_data[55],
                           data4portugal_html=user_data[61],
                           data4germany_html=user_data[63],
                           data4australia_html=user_data[64])
Esempio n. 4
0
        ],
        [
            9, "USA", 39.0, "Male", 2, 2, 4, 1, 2, "Anger",
            "Impacts on day-to-day life", "Exercising", "Software Engineer"
        ],
        [
            10, "USA", 78.0, "Male", 5, 5, 5, 1, 1, "Anger",
            "National leadership incompetence", "Family", "Executive - retired"
        ]
    ]
}

labels = util.cluster_user_data(sample_data['user_data'])
print("predicted labels are: ", labels)

split_result = util.split_user_data(sample_data['user_data'], labels)
print("Split original user data to:")
print(split_result)
print("Split_result length is: ", len(split_result))

# def cluster_user_data(input_data, emotional_col_start=4, emotional_col_end=9, n_clusters=3):
# 	'''
# 	This function cluster user data based on KMeans algorithm
# 	By default, it will split your data into three groups
# 	'''
# 	# collect answers for five emotional questions
# 	# which are located from 4th col to 9th col
# 	emotional_data = [i[emotional_col_start:emotional_col_end] for i in input_data['user_data']]
# 	# use kmeans to cluster data
# 	kmeans = KMeans(n_clusters).fit(emotional_data)
# 	# return cluster labels
Esempio n. 5
0
def index():
    list_of_tables = [
        {
            "title": "All User Data"
        },
        {
            "title": "Young Male Data"
        },
        {
            "title": "Middle-Aged or Old Male Data"
        },
        {
            "title": "Young Female Data"
        },
        {
            "title": "Middle-Aged or Old Female Data"
        },
        {
            "title": "Young Male Data USA"
        },
        {
            "title": "Young Male Data Canada"
        },
        {
            "title": "Middle-Aged or Old Male Data USA"
        },
        {
            "title": "Middle-Aged or Old Male Data Canada"
        },
        {
            "title": "Young Female Data USA"
        },
        {
            "title": "Young Female Data Canada"
        },
        {
            "title": "Middle-Aged or Old Female Data USA"
        },
        {
            "title": "Middle-Aged or Old Female Data Canada"
        },
    ]
    #Calling function, argument is path of folder where all CSV files are stored
    create_table(UPLOAD_FOLDER)

    table_df = pd.read_sql_table('we_are_not_alone_no_nan', con=engine)
    full_data = table_df.values
    list_of_tables[0]["data"] = full_data

    young_male_query = "select * from we_are_not_alone_no_nan where \"How old are you?\" <= 35 AND \"What is your gender?\" = \"Male\""
    young_male_df = pd.read_sql(young_male_query, con=engine)
    young_male_data = young_male_df.values
    labels = util.cluster_user_data(full_data)
    list_of_tables[1]["data"] = young_male_data

    middle_aged_or_old_male_query = "select * from we_are_not_alone_no_nan where \"How old are you?\" >=36 AND \"What is your gender?\" = \"Male\""
    middle_aged_or_old_male_df = pd.read_sql(middle_aged_or_old_male_query,
                                             con=engine)
    middle_aged_or_old_male_data = middle_aged_or_old_male_df.values
    list_of_tables[2]["data"] = middle_aged_or_old_male_data

    young_female_query = "select * from we_are_not_alone_no_nan where \"How old are you?\" <= 35 AND \"What is your gender?\" = \"Female\""
    young_female_df = pd.read_sql(young_female_query, con=engine)
    young_female_data = young_female_df.values
    list_of_tables[3]["data"] = young_female_data

    middle_aged_or_old_female_query = "select * from we_are_not_alone_no_nan where \"How old are you?\" >=36 AND \"What is your gender?\" = \"Female\""
    middle_aged_or_old_female_df = pd.read_sql(middle_aged_or_old_female_query,
                                               con=engine)
    middle_aged_or_old_female_data = middle_aged_or_old_female_df.values
    list_of_tables[4]["data"] = middle_aged_or_old_female_data

    young_male_usa_query = "select * from we_are_not_alone_no_nan where \"How old are you?\" <= 35 AND \"What is your gender?\" = \"Male\" AND \"What country do you live in?\" = \"USA\""
    young_male_usa_df = pd.read_sql(young_male_usa_query, con=engine)
    young_male_usa_data = young_male_usa_df.values
    list_of_tables[5]["data"] = young_male_usa_data

    young_male_can_query = "select * from we_are_not_alone_no_nan where \"How old are you?\" <= 35 AND \"What is your gender?\" = \"Male\" AND \"What country do you live in?\" = \"Canada\""
    young_male_can_df = pd.read_sql(young_male_can_query, con=engine)
    young_male_can_data = young_male_can_df.values
    list_of_tables[6]["data"] = young_male_can_data

    middle_aged_or_old_male_usa_query = "select * from we_are_not_alone_no_nan where \"How old are you?\" >=36 AND \"What is your gender?\" = \"Male\" AND \"What country do you live in?\" = \"USA\""
    middle_aged_or_old_male_usa_df = pd.read_sql(
        middle_aged_or_old_male_usa_query, con=engine)
    middle_aged_or_old_male_usa_data = middle_aged_or_old_male_usa_df.values
    list_of_tables[7]["data"] = middle_aged_or_old_male_usa_data

    middle_aged_or_old_male_can_query = "select * from we_are_not_alone_no_nan where \"How old are you?\" >=36 AND \"What is your gender?\" = \"Male\" AND \"What country do you live in?\" = \"Canada\""
    middle_aged_or_old_male_can_df = pd.read_sql(
        middle_aged_or_old_male_can_query, con=engine)
    middle_aged_or_old_male_can_data = middle_aged_or_old_male_can_df.values
    list_of_tables[8]["data"] = middle_aged_or_old_male_can_data

    young_female_usa_query = "select * from we_are_not_alone_no_nan where \"How old are you?\" <= 35 AND \"What is your gender?\" = \"Female\" AND \"What country do you live in?\" = \"USA\""
    young_female_usa_df = pd.read_sql(young_female_usa_query, con=engine)
    young_female_usa_data = young_female_usa_df.values
    list_of_tables[9]["data"] = young_female_usa_data

    young_female_can_query = "select * from we_are_not_alone_no_nan where \"How old are you?\" <= 35 AND \"What is your gender?\" = \"Female\" AND \"What country do you live in?\" = \"Canada\""
    young_female_can_df = pd.read_sql(young_female_can_query, con=engine)
    young_female_can_data = young_female_can_df.values
    list_of_tables[10]["data"] = young_female_can_data

    middle_aged_or_old_female_usa_query = "select * from we_are_not_alone_no_nan where \"How old are you?\" >=36 AND \"What is your gender?\" = \"Female\" AND \"What country do you live in?\" = \"USA\""
    middle_aged_or_old_female_usa_df = pd.read_sql(
        middle_aged_or_old_female_usa_query, con=engine)
    middle_aged_or_old_female_usa_data = middle_aged_or_old_female_usa_df.values
    list_of_tables[11]["data"] = middle_aged_or_old_female_usa_data

    middle_aged_or_old_female_can_query = "select * from we_are_not_alone_no_nan where \"How old are you?\" >=36 AND \"What is your gender?\" = \"Female\" AND \"What country do you live in?\" = \"Canada\""
    middle_aged_or_old_female_can_df = pd.read_sql(
        middle_aged_or_old_female_can_query, con=engine)
    middle_aged_or_old_female_can_data = middle_aged_or_old_female_can_df.values
    list_of_tables[12]["data"] = middle_aged_or_old_female_can_data

    for item in list_of_tables[5:]:
        if len(item["data"]) > 10:

            labels = util.cluster_user_data(item["data"])

            groups = util.split_user_data(item["data"], labels)

            for i, table in enumerate(groups):
                dict = {
                    "title": "%s KMeans Split #%d" % (item["title"], i + 1),
                    "data": []
                }
                dict["data"] = table
                list_of_tables += [dict]

    return render_template('index.html',
                           labels_html=labels,
                           column_html=column_names,
                           data_html=list_of_tables)
def index():
    conn = connect()
    cur = conn.cursor()
    
    COUNTRIES = "SELECT DISTINCT country FROM not_alone;"
    cur.execute(COUNTRIES)
    countries = cur.fetchall()

    subGroups = []

    #Split data into 4 groups based on age and gender
    #group 1
    GROUP_1 = "CREATE TABLE group_1 AS (SELECT * FROM not_alone WHERE age <= 35 AND gender = 'Male');"
    
    cur.execute(GROUP_1)

    #group 2
    GROUP_2 = "CREATE TABLE group_2 AS (SELECT * FROM not_alone WHERE age >= 36 AND gender = 'Male');"

    cur.execute(GROUP_2)

    #group 3
    GROUP_3 = "CREATE TABLE group_3 AS (SELECT * FROM not_alone WHERE age <= 35 AND gender = 'Female');"

    cur.execute(GROUP_3)

    #group 4
    GROUP_4 = "CREATE TABLE group_4 AS (SELECT * FROM not_alone WHERE age >= 36 AND gender = 'Female');"

    cur.execute(GROUP_4)
    
    #split each group into smaller groups based on country and size
    for x in range(1,5):
        for row in countries:
            #drop TEMP table from pervious loop cycle
            cur.execute("DROP TABLE IF EXISTS TEMP;")

            #create table to store current subgroup in
            group = "GROUP_" + str(x)
            country = row[0]
            TEMP = "CREATE TABLE TEMP AS (SELECT * FROM " + group + " WHERE country = '" + country + "');"
            TEMPROWS = "SELECT * FROM TEMP;"
            cur.execute(TEMP)

            #get number of rows in table, to make sure subgroup is not empty
            cur.execute("SELECT COUNT(*) FROM TEMP;")
            rowCount = cur.fetchone()[0]

            #check if subgroup is empty
            if rowCount > 0:
                #if current country has more than 10 entries, split using kmeans
                cur.execute(TEMPROWS)
                subGroup = cur.fetchall()
                if rowCount >= 10:
                    labels = util.cluster_user_data(subGroup)
        
                    splitGroup = util.split_user_data(subGroup, labels)

                    for y in splitGroup:
                         subGroups.append(y)
                else:
                    subGroups.append(subGroup)

    #pass groups to index.html
    return render_template('index.html', subGroups = subGroups, countries = countries, column_html = column_names)
Esempio n. 7
0
def query_survey_results4(country_name=''):

    country, country_result = util.country_group3()
    result = util.group3()
    labels = util.cluster_user_data(result)
    x1 = []

    len_cluster = 3
    j1 = -1
    country1 = []
    for j in range(len(country)):
        country_item = country[j]

        if len(country_result[country_item]) > 10:
            country1.append(country[j])
            result = util.group3()
            labels = util.cluster_user_data(country_result[country_item])
            j1 = j1 + 1
            result1 = util.split_user_data(country_result[country_item],
                                           labels)
            for i in range(3):
                #print(i)
                #print(j)
                x1.append([])
                x1[j1].append(result1[i])
                # print(j1)
                # print(i)
                # print(x1[j1][i])
    group3 = x1
    #print(group3)

    country, country_result = util.country_group1()
    #print(country_result)
    result = util.group1()
    labels = util.cluster_user_data(result)
    x1 = []

    len_cluster = 3
    j1 = -1
    country1 = []
    for j in range(len(country)):
        country_item = country[j]

        if len(country_result[country_item]) > 10:
            country1.append(country[j])
            result = util.group1()
            labels = util.cluster_user_data(country_result[country_item])
            j1 = j1 + 1
            result1 = util.split_user_data(country_result[country_item],
                                           labels)
            for i in range(3):
                #print(i)
                #print(j)
                x1.append([])
                x1[j1].append(result1[i])
    group1 = x1
    #print(group1)
    country, country_result = util.country_group2()
    result = util.group2()
    labels = util.cluster_user_data(result)
    x1 = []

    len_cluster = 3
    j1 = -1
    country1 = []
    for j in range(len(country)):
        country_item = country[j]

        if len(country_result[country_item]) > 10:
            country1.append(country[j])
            result = util.group2()
            labels = util.cluster_user_data(country_result[country_item])
            j1 = j1 + 1
            result1 = util.split_user_data(country_result[country_item],
                                           labels)
            for i in range(3):
                #print(i)
                #print(j)
                x1.append([])
                x1[j1].append(result1[i])

    group2 = x1
    #print(group2)
    country, country_result = util.country_group4()
    result = util.group4()
    labels = util.cluster_user_data(result)
    x1 = []

    len_cluster = 3
    j1 = -1
    country1 = []
    for j in range(len(country)):
        country_item = country[j]

        if len(country_result[country_item]) > 10:
            country1.append(country[j])
            result = util.group4()
            labels = util.cluster_user_data(country_result[country_item])
            j1 = j1 + 1
            result1 = util.split_user_data(country_result[country_item],
                                           labels)
            for i in range(3):
                #print(i)
                #print(j)
                x1.append([])
                x1[j1].append(result1[i])
                # print(j1)
                # print(i)
                # print(x1[j1][i])

    group4 = x1
    #print(group4)
    dictionary_usa = {'country': ['Unites States Of America']}
    dictionary_romania = {'country': ['Romania']}
    dictionary_switzerland = {'country': ['Switzerland']}
    for i in range(2):
        for j in range(3):
            if (group1[i][j][1][1] == 'USA'):
                dictionary_usa['group1' + 'cluster' + str(j)] = [group1[i][j]]
            else:
                dictionary_romania['group1' + 'cluster' +
                                   str(j)] = [group1[i][j]]

    for i in range(1):
        for j in range(3):
            if (group2[i][j][1][1] == 'USA'):
                dictionary_usa['group2' + 'cluster' + str(j)] = [group2[i][j]]
            else:
                dictionary_romania['group2' + 'cluster' +
                                   str(j)] = [group2[i][j]]

    for i in range(3):
        for j in range(3):
            if (group3[i][j][1][1] == 'USA'):
                dictionary_usa['group3' + 'cluster' + str(j)] = [group3[i][j]]
            else:
                if (group3[i][j][1][1] == 'Switzerland'):
                    dictionary_switzerland['group3' + 'cluster' +
                                           str(j)] = [group3[i][j]]
                else:
                    dictionary_romania['group3' + 'cluster' +
                                       str(j)] = [group3[i][j]]

    for i in range(2):
        for j in range(3):
            if (group4[i][j][1][1] == 'USA'):
                dictionary_usa['group4' + 'cluster' + str(j)] = [group4[i][j]]
            else:
                dictionary_romania['group4' + 'cluster' +
                                   str(j)] = [group4[i][j]]
    dictionary_usa = str(dictionary_usa)
    dictionary_usa = dictionary_usa.replace('(', '[')
    dictionary_usa = dictionary_usa.replace(')', ']')
    dictionary_usa = eval(dictionary_usa)

    dictionary_romania = str(dictionary_romania)
    dictionary_romania = dictionary_romania.replace('(', '[')
    dictionary_romania = dictionary_romania.replace(')', ']')
    dictionary_romania = eval(dictionary_romania)

    dictionary_switzerland = str(dictionary_switzerland)
    dictionary_switzerland = dictionary_switzerland.replace('(', '[')
    dictionary_switzerland = dictionary_switzerland.replace(')', ']')
    dictionary_switzerland = eval(dictionary_switzerland)
    #print(group1[0][0])
    return json.dumps(dictionary_romania)