コード例 #1
0
def main():
    mat = load_mat("data/CEpiMatch.csv")
    mat = 1 / mat

    Z = linkage(mat)

    # plt.figure()

    dendrogram(Z, labels=mat.rlabels, orientation="right", leaf_font_size=5)

    plt.title("Hierarchical clustering result")
    plt.ylabel("PDB ID")
    plt.plot()
    plt.savefig("img/166.png")
コード例 #2
0
def for_lysozyme():
    """
    try various version of distance matrix for only lysozyme
    """

    import sys

    method = sys.argv[1]

    paths = [
        "CEpiMatch.csv",
        "Multiprot.csv",
        "SPa.csv",
        "SPb.csv",
        "SPe.csv",
        "TMa.csv",
        "TMb.csv",
        "TMc.csv",
        "MATT.csv",
        "RMSD.csv",
    ]

    for path in paths:
        mats = categorize(load_mat("data/%s" % path), data)

        mat = mats["lysozyme"]

        if path not in ("RMSD.csv",):
            mat = 1 / mat

        if method == "upper":
            mat = np.triu(mat)
        elif method == "lower":
            mat = np.transpose(np.tril(mat))
        elif method == "average":
            mat = (np.tril(mat) + np.triu(mat)) / 2

        plt.figure()

        Z = linkage(mat)

        dendrogram(Z, labels=mat.rlabels, orientation="right")

        plt.title("Hierarchical clustering for lysozyme")
        plt.ylabel("PDB ID")
        plt.savefig("img/for_lysozyme_%s/%s.png" % (method, path.split(".")[0]))
コード例 #3
0
def compare():
    """
    plot a series of hcluster dendragram for various methods on various sets of virus
    """

    paths = [
        "CEpiMatch.csv",
        "Multiprot.csv",
        "SPa.csv",
        "SPb.csv",
        "SPe.csv",
        "TMa.csv",
        "TMb.csv",
        "TMc.csv",
        "MATT.csv",
        "RMSD.csv",
    ]

    for path in paths:
        mats = categorize(load_mat("data/%s" % path), data)

        for name, mat in mats.items():
            if path not in ("RMSD.csv",):
                mat = 1 / mat

            Z = linkage(mat)

            plt.figure()

            dendrogram(Z, labels=mat.rlabels, orientation="right")

            type_name = path.split(".")[0]

            plt.title("%s  %s" % (name, type_name))
            plt.ylabel("PDB ID")
            plt.savefig("img/%s/%s.png" % (name, type_name))
Q7_path = os.path.join('acquired_data', 'mat_files', 'Smart')
mat_name = 'Siren_0001'

# Initializations
audios = dict()
sampling_rates = []

# Load data
df_my_timestamps = load_csv(my_csv_path, csv_name)
for audio_name in audio_names:
    audios[audio_name], sr = librosa.load(os.path.join(audio_path,
                                                       audio_name + '.wav'),
                                          sr=None)
    sampling_rates.append(sr)
mic_mat, siren_mat = load_mat(mic_path=smart_path,
                              siren_path=Q7_path,
                              mat_name=mat_name)

# Operations
for name, audio in audios.items():
    print(name)
    print(audio.shape)

print(mic_mat['Data1_GPS_Time_msec___Time_in_M'].shape)
print(mic_mat['Data1_GPS_Time_Week___Start_0h_'].shape)
print(mic_mat['Data1_INS_Time_msec___Time_in_M'].shape)
print(mic_mat['Data1_INS_Time_Week___Weeks__St'].shape)

print(siren_mat['Data1_GPS_Time_msec___Time_in_M'].shape)
print(siren_mat['Data1_GPS_Time_Week___Start_0h_'].shape)
print(siren_mat['Data1_INS_Time_msec___Time_in_M'].shape)