def main(df, args):
    df = pandas.read_hdf('data/sonorities/sonorities-by-measure.hd5', "sonorities")
    report_similarity_by_categories(df)
    # report_similarities_by_year(df)

    pcset_series = music.get_pcset_table()
    pcset_diff_args = report_pcset_similarity_difference(pcset_series)
    reports.make_json_file(pcset_diff_args, "categories-pcset-difference")
def main(df, args):
    method = "interval_vector"
    labels = ["Piece", "Sonorities"]
    p = 0.01  # KS
    eps = 0.05  # DBSCAN
    h = df[df["composer"] == "haydn"]
    m = df[df["composer"] == "mozart"]
    b = df[df["composer"] == "beethoven"]
    hmb = filter_classical_composers(df)
    freq_df = get_frequency(hmb, method)
    dist_df = frequency_matrix(freq_df, _distance)
    ks_df = frequency_matrix(freq_df, _ks_test, {"p": 0.01})

    # Amount
    classical = different_sonorities_amount(hmb, method).to_javascript()
    haydn = different_sonorities_amount(h, method).to_javascript()
    mozart = different_sonorities_amount(m, method).to_javascript()
    beethoven = different_sonorities_amount(b, method).to_javascript()

    # Distance distribution
    dist_mean = dist_df.mean()
    dist_var = SpecialDataFrame(dist_df.var(), columns=["Distance Variance"])
    normalized_dist_mean = SpecialDataFrame((dist_mean - dist_mean.mean()) / dist_mean.std(), columns=["Distance Mean"])
    histogram_dist = SpecialDataFrame(dist_df.sum(), columns=["Euclidean Distance"])
    ks_sum = SpecialDataFrame(ks_df.sum(), columns=["False KS Sum"])

    amount_args = [
        _chart(haydn, "Haydn quartets"),
        _chart(mozart, "Mozart quartets"),
        _chart(beethoven, "Beethoven quartets"),
        _chart(classical, "Haydn, Mozart and Beethoven quartets"),
    ]

    distance_heatmap_args = [
        {
            "data": dist_df.to_javascript(),
            "chart-type": "BioHeatMap",
            "options": {
                "title": "Distance between quartets",
                "mapWidth": 700,
                "mapHeight": 500,
                # bioheatmap options
                "numberOfColors": 100,
                "startColor": {"r": 255, "g": 255, "b": 255, "a": 1},
                "endColor": {"r": 0, "g": 128, "b": 255, "a": 1},
                "passThroughBlack": False,
                "drawBorder": False,
            },
        },
        {
            "data": ks_df.to_javascript(True),
            "chart-type": "BioHeatMap",
            "options": {
                "title": "Kolmogorov-Smirnov test between quartets",
                "mapWidth": 700,
                "mapHeight": 500,
                # bioheatmap options
                "numberOfColors": 100,
                "startColor": {"r": 255, "g": 255, "b": 255, "a": 1},
                "endColor": {"r": 0, "g": 128, "b": 255, "a": 1},
                "passThroughBlack": "false",
                "drawBorder": "false",
            },
        },
    ]

    distance_distribution_args = [
        {
            "data": normalized_dist_mean.to_javascript(),
            "chart-type": "ColumnChart",
            "options": {
                "title": "Normalized mean distance to quartets",
                "hAxis": {"title": "Quartet name"},
                "vAxis": {"title": "Euclidean distance (normalized by standard deviation)"},
            },
        },
        {
            "data": histogram_dist.to_javascript(),
            "chart-type": "Histogram",
            "options": {
                "title": "Distance sum to quartets (histogram)",
                "hAxis": {"title": "Euclidean distance (sum)"},
                "vAxis": {"title": "Number of quartets"},
            },
        },
        {
            "data": dist_var.to_javascript(),
            "chart-type": "ColumnChart",
            "options": {
                "title": "Distance variance to quartets",
                "hAxis": {"title": "Quartet name"},
                "vAxis": {"title": "Euclidean distance variance"},
            },
        },
        {
            "data": dist_var.to_javascript(),
            "chart-type": "Histogram",
            "options": {
                "title": "Distance variance to quartets (histogram)",
                "hAxis": {"title": "Euclidean distance (variance)"},
                "vAxis": {"title": "Number of quartets"},
            },
        },
        {
            "data": ks_sum.to_javascript(),
            "chart-type": "ColumnChart",
            "options": {
                "title": "False KS 2-sample sum",
                "hAxis": {"title": "Quartet"},
                "vAxis": {"title": "Number of False KS"},
            },
        },
        {
            "data": ks_sum.to_javascript(),
            "chart-type": "Histogram",
            "options": {
                "title": "False KS 2-sample sum (histogram)",
                "hAxis": {"title": "Number of False KS"},
                "vAxis": {"title": "Number of quartets"},
            },
        },
    ]

    distance_to_interval_vector_args = [
        _chart_distance(sonorities_amount_and_distance(h).to_javascript(), "Haydn sonorities"),
        _chart_distance(sonorities_amount_and_distance(m).to_javascript(), "Mozart sonorities"),
        _chart_distance(sonorities_amount_and_distance(b).to_javascript(), "Beethoven sonorities"),
        _chart_distance(sonorities_amount_and_distance(hmb).to_javascript(), "Haydn, Mozart and Beethoven sonorities"),
    ]

    reports.make_json_file(amount_args, "sonorities-amount-timeline")
    reports.make_json_file(distance_heatmap_args, "sonorities-distance-frequency-heatmap")
    reports.make_json_file(distance_distribution_args, "sonorities-distance-frequency-distribution")
    reports.make_json_file(distance_to_interval_vector_args, "sonorities-distance")

    # cluster table
    cluster_to_csv(freq_df.T, dist_df, hmb, eps)