Ejemplo n.º 1
0
def main():
    if len(argv) > 1:
        df_raw = get_dataset(argv[1])
    else:
        return print(
            'Please input the path to the dataset as the first argument.')
    if df_raw is None:
        return print('Please input a valid path to the dataset.')
    possible_columnname = [
        'Hogwarts House', 'Astronomy', 'Herbology', 'Divination',
        'Muggle Studies', 'Ancient Runes', 'History of Magic',
        'Transfiguration', 'Potions', 'Charms', 'Flying'
    ]
    df = df_raw[possible_columnname]
    df = df.dropna()
    plot_pairplot(df)
    return
Ejemplo n.º 2
0
def main():
    df_raw = get_dataset('datasets/dataset_train.csv')
    if df_raw is None:
        return print('Please input a valid file as first argument.')
    df_filter = filter_dataframe(df_raw)
    df_norm = normalize_dataframe(df_filter)
    possible_columnname = [
        'Arithmancy', 'Astronomy', 'Herbology',
        'Defense Against the Dark Arts', 'Divination', 'Muggle Studies',
        'Ancient Runes', 'History of Magic', 'Transfiguration', 'Potions',
        'Care of Magical Creatures', 'Charms', 'Flying'
    ]
    if len(argv) > 1:
        if argv[1] in possible_columnname:
            plot_histogram(df_raw, df_norm, argv[1])
            return
    plot_histogram(df_raw, df_norm)
    return
Ejemplo n.º 3
0
def main():
    if len(argv) > 1:
        df_raw = get_dataset(argv[1])
    else:
        return print(
            'Please input the path to the dataset as the first argument.')
    if df_raw is None:
        return print('Please input a valid path to the dataset.')
    df_filter = filter_dataframe(df_raw)
    possible_columnname = [
        'Arithmancy', 'Astronomy', 'Herbology',
        'Defense Against the Dark Arts', 'Divination', 'Muggle Studies',
        'Ancient Runes', 'History of Magic', 'Transfiguration', 'Potions',
        'Care of Magical Creatures', 'Charms', 'Flying'
    ]
    if len(argv) == 4:
        if argv[2] in possible_columnname and argv[3] in possible_columnname:
            plot_scatter(df_filter, argv[2], argv[3])
            return
    plot_scatter(df_filter, 'Astronomy', 'Defense Against the Dark Arts')
    return
Ejemplo n.º 4
0
    return test_houses


def arguments_parsing():
    parser = argparse.ArgumentParser()
    parser.add_argument('test', type=str, help='test file')
    parser.add_argument('weights', type=str, help='weights file')
    args = parser.parse_args()
    return args


if __name__ == '__main__':

    # Argument
    args = arguments_parsing()
    dataset, num_categories = get_dataset(args.test)
    num_categories.remove('Index')
    nb = len(num_categories)
    dataset, useless = select_right_data_to_list(dataset, num_categories)

    # Read weights
    try:
        with open(args.weights, 'r') as f:
            w = np.zeros((4, nb + 1))
            fl = f.readlines()
            for i, x in enumerate(fl):
                for j, n in enumerate(x.split(',')):
                    n = float(n) if is_number(n) else 0
                    w[j][i] = n
    except:
        w = np.zeros((4, nb + 1))
Ejemplo n.º 5
0
    # Argument
    parser = argparse.ArgumentParser()
    parser.add_argument('file', type=str, help='file described')
    parser.add_argument('-lf',
                        '--search',
                        help='all plot',
                        action="store_true")
    parser.add_argument('-o', '--output', type=str, help='save file')
    args = parser.parse_args()

    houses = ['Gryffindor', 'Hufflepuff', 'Ravenclaw', 'Slytherin']
    colors = ['red', 'yellow', 'blue', 'green']
    labels = ['Gr', 'Huf', 'Rav', 'Sly']

    dataset, num_categories = get_dataset(args.file)
    num_categories.remove('Index')

    p = 2
    if not args.search:
        num_categories = ['Care of Magical Creatures']
        p = 1
    plt.figure(figsize=(15, 20))
    plt.subplots_adjust(top=0.98, wspace=0.2, hspace=0.5)
    for j, c in enumerate(num_categories):
        plt.subplot(math.ceil(len(num_categories) / 2), p, j + 1)
        for i, h in enumerate(houses):
            tmp_dataset = [
                data[c] for data in dataset
                if c in data and data['Hogwarts House'] == h
            ]