Ejemplo n.º 1
0
def main():
    df = utils.dataframe('dataset_train.csv')
    colors = utils.colors()

    df = df.drop(
        columns=['Index', 'Hogwarts House', 'First Name', 'Last Name'])
    df['Best Hand'] = df['Best Hand'].replace(to_replace=['Left', 'Right'],
                                              value=[1, 2])
    df['Year'] = df['Birthday'].apply(lambda x: int(x[0:4]))
    df['Month'] = df['Birthday'].apply(lambda x: int(x[5:7]))
    df['Day'] = df['Birthday'].apply(lambda x: int(x[8:10]))
    df['Birthday'] = df['Birthday'].apply(lambda x: time.mktime(
        datetime.datetime.strptime(x, "%Y-%m-%d").timetuple()))

    nb_columns = 0
    for column in df:
        nb_columns += 1

    i = 1
    for column in df:
        plt.subplot(4, math.ceil(nb_columns / 4 + 1), i)
        i += 1
        plt.hist(df[column],
                 bins=bin(df[column]),
                 color=colors[column],
                 label=column,
                 alpha=0.75,
                 edgecolor='black',
                 linewidth=0.5)
        plt.legend(loc='upper right')
    plt.show()
Ejemplo n.º 2
0
def main():
	df = utils.dataframe('dataset_train.csv')
	colors = utils.colors_house()

	df = df.drop(columns=['Index', 'First Name', 'Last Name', 'Birthday', 'Best Hand'])

	colors = [colors['Ravenclaw'], colors['Slytherin'], colors['Gryffindor'], colors['Hufflepuff']]
	sns.set_palette(sns.color_palette(colors))
	sns.pairplot(df, hue="Hogwarts House")

	plt.show()
Ejemplo n.º 3
0
def main():
    #matplotlib.use('webagg')
    iterable = [
        'Astronomy',
        'Herbology',
        'Ancient Runes',
        'Divination',
    ]
    plt.style.use('dark_background')

    colors_house = utils.colors_house()

    df = utils.dataframe('dataset_train.csv')
    colors = utils.colors()

    df_house = df['Hogwarts House']
    df = df.drop(columns=[
        'Index',
        'First Name',
        'Last Name',
        'Arithmancy',
        'Defense Against the Dark Arts',
        'Muggle Studies',
        'History of Magic',
        'Potions',
        'Care of Magical Creatures',
        'Flying',
        'Charms',
        'Transfiguration',
    ])

    allIterables = list(itertools.combinations(iterable, 2))
    nb_columns = 0
    for column in allIterables:
        nb_columns += 1

    i = 1
    for el in allIterables:
        plt.subplot(math.ceil(nb_columns / 3 + 1), 3, i)
        i += 1
        plt.scatter(df[el[0]],
                    df[el[1]],
                    c=df['Hogwarts House'].map(colors_house),
                    alpha=0.25,
                    label=[el[0][:10], el[1][:10]],
                    marker='o',
                    s=2)
        plt.xticks([])
        plt.yticks([])
        plt.legend(loc='upper right', fontsize=5)
    plt.show()
Ejemplo n.º 4
0
def main():
    df2 = pd.DataFrame()
    if not (len(sys.argv) == 1 + 1):
        print('\033[91m' + '✘ Error: ' + '\033[0m' +
              'CSV file is missing, please add his path as argument')
        sys.exit()
    df = utils.dataframe(sys.argv[1])
    columnsNamesArr = df.columns.values
    listOfColumnNames = list(columnsNamesArr)

    dico_numerals = {}
    for label in listOfColumnNames:
        if df[label].dtypes == str or df[label].dtypes == object:
            continue
        dico_numerals[label] = {
            'count': 0,
            'mean': 0,
            'std': 0,
            'min': float('inf'),
            '25%': 0,
            '50%': 0,
            '75%': 0,
            'max': float('-inf'),
            'total': 0
        }

    for label in listOfColumnNames:
        if df[label].dtypes == str or df[label].dtypes == object:
            continue
        for index, row in df.iterrows():
            if row[label] > dico_numerals[label]['max']:
                dico_numerals[label]['max'] = row[label]
            if row[label] < dico_numerals[label]['min']:
                dico_numerals[label]['min'] = row[label]
            if np.isnan(row[label]) != True:
                dico_numerals[label]['count'] += 1
                dico_numerals[label]['total'] += row[label]
        if (dico_numerals[label]['count'] > 0):
            dico_numerals[label]['mean'] = dico_numerals[label][
                'total'] / dico_numerals[label]['count']
            dico_numerals[label]['25%'] = utils.calc_quantile(
                df[label].dropna(), 0.25)
            dico_numerals[label]['50%'] = utils.calc_quantile(
                df[label].dropna(), 0.5)
            dico_numerals[label]['75%'] = utils.calc_quantile(
                df[label].dropna(), 0.75)
            dico_numerals[label]['std'] = utils.stdev(df[label].dropna())

    describe(dico_numerals)
Ejemplo n.º 5
0
def main():
	iterable = [
		"Birthday",
		"Best Hand",
		"Arithmancy",
		"Astronomy",
		"Herbology",
		"Defense Against the Dark Arts",
		"Divination",
		"Muggle Studies",
		"Ancient Runes",
		"History of Magic",
		"Transfiguration",
		"Potions",
		"Care of Magical Creatures",
		"Charms",
		"Flying",
		"Year",
		"Month",
		"Day"
	]

	df = utils.dataframe('dataset_train.csv')
	colors = utils.colors()

	df = df.drop(columns=['Index', 'Hogwarts House', 'First Name', 'Last Name'])
	df['Best Hand'] = df['Best Hand'].replace(to_replace=['Left', 'Right'], value=[1, 2])
	df['Year'] = df['Birthday'].apply(lambda x: int(x[0:4]))
	df['Month'] = df['Birthday'].apply(lambda x: int(x[5:7]))
	df['Day'] = df['Birthday'].apply(lambda x: int(x[8:10]))
	df['Birthday'] = df['Birthday'].apply(lambda x: time.mktime(datetime.datetime.strptime(x, "%Y-%m-%d").timetuple()))


	allIterables = list(itertools.combinations(iterable, 2))

	nb_columns = 0
	for column in allIterables:
		nb_columns += 1

	i = 1
	for el in allIterables:
		plt.subplot(7, math.ceil(nb_columns / 7 + 1), i)
		i += 1
		plt.scatter(df[el[0]], df[el[1]], color=utils.combine_hex_values(colors[el[0]], colors[el[1]]), alpha=0.25, label=[el[0], el[1]], s=3)
		plt.xticks([])
		plt.yticks([])
		plt.legend(loc='upper right', fontsize=5)
	plt.show()
Ejemplo n.º 6
0
def main():
	df2 = pd.DataFrame()
	if not (len(sys.argv) == 1+1):
		print('\033[91m' + '✘ Error: ' + '\033[0m' + 'CSV file is missing, please add his path as argument')
		sys.exit()
	df = utils.dataframe(sys.argv[1])
	columnsNamesArr = df.columns.values
	listOfColumnNames = list(columnsNamesArr)

	dico_objects = {} 
	for label in listOfColumnNames:
		if df[label].dtypes == str or df[label].dtypes == object:
			dico_objects[label] = {
				'count': 0,
				'unique': 0,
				'top': 0,
				'freq': 0,
				'all': {}
			}
		
	for label in listOfColumnNames:
		if df[label].dtypes == str or df[label].dtypes == object:
			for index, row in df.iterrows():
				if (isinstance(row[label], float) and np.isnan(row[label])):
					continue
				dico_objects[label]['count']+=1
				if row[label] in dico_objects[label]['all']:
					dico_objects[label]['all'][row[label]]+=1
				else:
					dico_objects[label]['all'][row[label]] = 1

	for label in dico_objects:
		i = 0
		for k in dico_objects[label]['all']:
			i+=1
			if dico_objects[label]['all'][k] > dico_objects[label]['freq']:
				dico_objects[label]['freq'] = dico_objects[label]['all'][k]
				dico_objects[label]['top'] = k
		dico_objects[label]['unique'] = i

	describe(dico_objects)
Ejemplo n.º 7
0
def main():
    b_df = utils.dataframe('dataset_train.csv')
    colors = utils.colors_house()
    colors = [
        colors['Ravenclaw'], colors['Slytherin'], colors['Gryffindor'],
        colors['Hufflepuff']
    ]
    sns.set_palette(sns.color_palette(colors))

    b_df['Year'] = b_df['Birthday'].apply(lambda x: int(x[0:4]))
    b_df['Best Hand'] = b_df['Best Hand'].replace(to_replace=['Left', 'Right'],
                                                  value=[1, 2])
    b_df['Year'] = b_df['Birthday'].apply(lambda x: int(x[0:4]))
    b_df['Month'] = b_df['Birthday'].apply(lambda x: int(x[5:7]))
    b_df['Day'] = b_df['Birthday'].apply(lambda x: int(x[8:10]))
    b_df['Birthday'] = b_df['Birthday'].apply(lambda x: time.mktime(
        datetime.datetime.strptime(x, "%Y-%m-%d").timetuple()))

    df = pd.DataFrame()
    df['Hogwarts House'] = b_df['Hogwarts House']
    df['Best Hand'] = b_df['Best Hand']
    df['Year'] = b_df['Year']
    df['Month'] = b_df['Month']
    df['Day'] = b_df['Day']
    df['Birthday'] = b_df['Birthday']
    df['First Name'] = b_df['First Name'].apply(lambda x: int(len(x)))
    df['Last Name'] = b_df['Last Name'].apply(lambda x: int(len(x)))
    df['Last Name Vowel'] = b_df['Last Name'].apply(
        lambda x: int(count_vowel(x)))
    df['First Name Vowel'] = b_df['First Name'].apply(
        lambda x: int(count_vowel(x)))
    df['Last Name Consonant'] = b_df['Last Name'].apply(
        lambda x: int(len(x) - count_vowel(x)))
    df['First Name Consonant'] = b_df['First Name'].apply(
        lambda x: int(len(x) - count_vowel(x)))

    sns.pairplot(df, hue="Hogwarts House")
    plt.show()
Ejemplo n.º 8
0
def main():
    #matplotlib.use('webagg')
    '''
		'Arithmancy',
		'Astronomy',
		'Herbology',
		'Defense Against the Dark Arts',
		'Divination',
		'Muggle Studies',
		'Ancient Runes',
		'History of Magic',
		'Transfiguration',
		'Potions',
		'Care of Magical Creatures':,
		'Charms',
		'Flying',
	'''
    iterable = [
        'Astronomy',
        'Herbology',
        'Ancient Runes',
        'Divination',
    ]
    plt.style.use('dark_background')

    colors_house = utils.colors_house()

    df = utils.dataframe('dataset_train.csv')
    colors = utils.colors()

    df_house = df['Hogwarts House']

    allIterables = list(itertools.combinations(iterable, 2))
    nb_columns = 0
    for column in allIterables:
        nb_columns += 1

    i = 1
    for el in allIterables:
        plt.subplot(math.ceil(nb_columns / 3 + 1), 3, i)
        i += 1
        plt.scatter(df[el[0]],
                    df[el[1]],
                    c=df['Hogwarts House'].map(colors_house),
                    alpha=0.25,
                    label=[el[0][:10], el[1][:10]],
                    marker='o',
                    s=2)
        df_errors = df.loc[[
            184, 200, 255, 339, 443, 445, 456, 504, 515, 618, 681, 704, 815,
            820, 824, 915, 941, 1078, 1098, 1113, 1191, 1282, 1419, 1435, 1444,
            1446, 1448, 1515, 1525, 1596
        ]]
        plt.scatter(df_errors[el[0]],
                    df_errors[el[1]],
                    c=df_errors['Hogwarts House'].map(colors_house),
                    marker='x')
        plt.xticks([])
        plt.yticks([])
        plt.legend(loc='upper right', fontsize=5)
    plt.show()

    print()