Python CowsDatasetの例、Welfare_AI_dataset.CowsDataset Pythonの例

コード例 #1

0

ファイルを表示

 def create_generated_cow_data(dataset, sheet_index, n_samples):
     """
     generate new samples of columns(joints) of one sheet(cow)
     :param n_samples: The number of synthetic samples to generate
     :param dataset: object from class Cowsdataset
     :param sheet_index: the index (beginning from 0) of the sheet of the excel file
     :return: all the new samples of the different columns/joints of the cow
     """
     file_name = CowsDataset.get_cow_names()[sheet_index]
     writer = pd.ExcelWriter(file_name + '_Medfilt7%.xlsx',
                             engine='xlsxwriter')
     sheets = []
     new_dataset = []
     for i in range(n_samples):
         sheet_name = file_name  #+ str(i)
         sheets.append(sheet_name)
         generated_data = []
         for j, column in enumerate(dataset[sheet_index]):
             generated_data.append(
                 GeneratedData.generate_samples(dataset, sheet_index,
                                                column))
         df = pd.DataFrame(generated_data).T
         df.columns = CowsDataset.get_joint_names(
             dataset
         )  # Convert the dataframe to an XlsxWriter Excel object.
         new_dataset.append(df)
         #df.to_excel(writer, sheet_name=sheet_name, index=False)
         #GeneratedData.generated_data_to_excel(df, file_name, sheet_name)
     return new_dataset, sheets

コード例 #2

0

ファイルを表示

 def create_super_dissimilarity_matrix(dataset):
     list_of_joints = CowsDataset.get_list_of_joints(dataset)
     dissimilarity_matrix = []
     for i, df in enumerate(list_of_joints):
         dissimilarity_matrix.append(
             Hierarchical_clustering.pearson_correlation(list_of_joints, i))
     return Hierarchical_clustering.create_mean_matrix(dissimilarity_matrix)

コード例 #3

0

ファイルを表示

 def plot_cosine_trees(dataset):
     list_of_joints = CowsDataset.get_list_of_joints(dataset)
     for i, df in enumerate(list_of_joints):
         dissimilarity_cosine = Hierarchical_clustering.cosine_correlation(
             list_of_joints, i)
         dissimilarity_cosine = ssd.squareform(dissimilarity_cosine,
                                               checks=False)
         hierarchy_cosine = linkage(dissimilarity_cosine, method='average')
         cow_names = [
             CowsDataset.get_cow_name(sheet)
             for i, sheet in enumerate(dataset.sheet_names)
         ]
         f, ax = plt.subplots(1, 1)
         dnCosine = dendrogram(hierarchy_cosine,
                               ax=ax,
                               labels=cow_names,
                               leaf_rotation=90,
                               leaf_font_size=10)
         joints = CowsDataset.get_joint_names(dataset)
         ax.set_title('Dendrogram Cosine' + joints[i])

コード例 #4

0

ファイルを表示

 def get_labels_clustering_corr(dataset):
     list_of_joints = CowsDataset.get_list_of_joints(dataset)
     for i, df in enumerate(list_of_joints):
         dissimilarity_corr = Hierarchical_clustering.dissimilarity_matrix(
             list_of_joints, i)
         dissimilarity_corr = ssd.squareform(dissimilarity_corr,
                                             checks=False)
         hierarchyCorr = linkage(dissimilarity_corr, method='average')
         labelsCorr = fcluster(hierarchyCorr, t=2, criterion='maxclust')
         print('labelsCorr : ', labelsCorr)
         return labelsCorr

コード例 #5

0

ファイルを表示

 def get_labels_clustering_cosine(dataset):
     list_of_joints = CowsDataset.get_list_of_joints(dataset)
     for i, df in enumerate(list_of_joints):
         dissimilarity_cosine = Hierarchical_clustering.cosine_correlation(
             list_of_joints, i)
         dissimilarity_cosine = ssd.squareform(dissimilarity_cosine,
                                               checks=False)
         hierarchyCosine = linkage(dissimilarity_cosine, method='average')
         labelsCosine = fcluster(hierarchyCosine, t=2, criterion='maxclust')
         print('labelsCosine : ', labelsCosine)
         return labelsCosine

コード例 #6

0

ファイルを表示

    def plot_corr_trees(dataset):
        list_of_joints = CowsDataset.get_list_of_joints(dataset)
        for i, df in enumerate(list_of_joints):
            dissimilarity_matrix = Hierarchical_clustering.dissimilarity_matrix(
                list_of_joints, i)
            dissimilarity_matrix = ssd.squareform(dissimilarity_matrix,
                                                  checks=False)
            hierarchyCorr = linkage(dissimilarity_matrix, method='average')
            cow_names = [
                CowsDataset.get_cow_name(sheet)
                for i, sheet in enumerate(dataset.sheet_names)
            ]
            f, ax = plt.subplots(1, 1)
            dnCorr = dendrogram(hierarchyCorr,
                                ax=ax,
                                labels=cow_names,
                                leaf_rotation=90,
                                leaf_font_size=10)
            joints = CowsDataset.get_joint_names(dataset)
            ax.set_title('Dendrogram Correlation' + joints[i])
            plt.rcParams.update({'figure.max_open_warning': 0})


#
# lenCosine = len(dissimilarityCosine)
# lenCorr = len(dissimilarityCorrelation)
# superDissimilarityCosine = sumCosine / lenCosine
# superDissimilarityCorr = sumCorr / lenCorr
# hierarchyCosine = linkage(superDissimilarityCosine, method='average')
# hierarchyCorr = linkage(superDissimilarityCorr, method='average')
# f, axes = plt.subplots(1, 2, sharey=True)
# dnCosine = dendrogram(hierarchyCosine, ax=axes[0], labels=cowNames, leaf_rotation=90, leaf_font_size=10)
# axes[0].set_ylabel('dendrogram Cosine Side2 with all the cows')
# dnCorr = dendrogram(hierarchyCorr, ax=axes[1], labels=cowNames, leaf_rotation=90, leaf_font_size=10)
# axes[1].set_ylabel('dendrogram Correlation Side2 with all the cows')
# labelsCosine = fcluster(hierarchyCosine, t=2, criterion='maxclust')
# labelsCorr = fcluster(hierarchyCorr, t=2, criterion='maxclust')
# kCos = KMeans(n_clusters=2, random_state=0).fit_predict(superDissimilarityCosine)
# kCorr = KMeans(n_clusters=2, random_state=0).fit_predict(superDissimilarityCorr)
# print(kCos)
# print(kCorr)

コード例 #7

0

ファイルを表示

    def plot_subplots_joint(dataset):
        """
        

        Parameters
        ----------
        dataset : TYPE
            DESCRIPTION.

        Returns change side1 and side2
        -------
        None.

        """
        joint_names = CowsDataset.get_joint_names(dataset)
        list_of_joints = dataset.get_list_of_joints()
        for i, column in enumerate(joint_names):
            fig, axes = plt.subplots(3, 3, figsize=(15, 8), sharey='col')
            side = dataset.sheet_names[0][-5:]
            title = column + '  ' + side
            fig.suptitle(title, fontsize=20)
            min = Plot_dataset.get_min_scale_value(dataset, column)
            max = Plot_dataset.get_max_scale_value(dataset, column)
            for j, ax in enumerate(axes.flat):
                cow_name = CowsDataset.get_cow_name(dataset.sheet_names[j])
                plt.subplot(3, 3, j + 1)
                ax = plt.plot(
                    dataset.remove_outliers(j, column, number=0.05, order=10))
                plt.ylim(min, max)
                plt.title("\n\n" + cow_name)
                plt.subplots_adjust(wspace=3)
                Plot_dataset.plot_step_start(dataset[j], 'Front_Step', 'red',
                                             7)
                Plot_dataset.plot_step_start(dataset[j], 'Back_Step', 'green',
                                             7)
            fig.tight_layout()
            image_name = column + '_motionvis_' + '.png'
            plt.gcf().savefig(
                os.path.join(os.path.dirname(__file__), 'Plot', 'joints',
                             'side2', image_name))
            plt.show()

コード例 #8

0

ファイルを表示

 def generate_samples(dataset, sheet_index, column_name):
     """
     generate new sample of column(joint) in the data
     by adding a random value between [-2%,+2%] to the original values
     :param dataset: Object of class CowsDataset
     :param sheet_index: the index (beginning from 0) of the sheet of the excel file
     :param column_name: String The name of the joint
     :return: new synthetic data based on the real data
     """
     generated_data = []
     movement_begin = np.where(
         ~(np.isnan(dataset[sheet_index][column_name]))
     )  # to see where the movement begins
     nan_indexes = np.where((np.isnan(dataset[sheet_index][column_name])))
     #print(movement_begin)
     joint_column = dataset[sheet_index][column_name]
     joint_column = CowsDataset.remove_outliers(dataset,
                                                sheet_index,
                                                column_name,
                                                number=0.05,
                                                order=10)  #remove outliers
     joint_column = joint_column.round(3)  # round with 3 decimal numbers
     # std_deviation = np.std(joint_column)
     # sigma = 3*std_deviation
     kernel_size = 9
     for i, row in enumerate(joint_column):
         if np.isnan(row):
             generated_data.append(row)
         else:
             b_min = int(min(-0.02 * row, 0.02 * row))
             b_max = -b_min
             #random.seed(i)
             generated_data.append(row + random.randint(b_min, b_max))
             #generated_data = pd.DataFrame(generated_data)
     #generated_data = shift(medfilt(generated_data), shift=movement_begin, cval=np.NaN)# I don't know why il doesn't work with movement_begin[0]
     #for the first value( je crois le problème est là!!)
     generated_data = medfilt(generated_data, kernel_size=kernel_size)
     #generated_data = gaussian_filter(generated_data, sigma)
     # line1, = plt.plot(generated_data, label='generated_data' + column_name)
     # line2, = plt.plot(joint_column, label='real_data' + column_name)
     # plt.legend(handler_map={line1: HandlerLine2D(numpoints=4)})
     # plt.show()
     # ax = joint_column.plot.hist(bins=15)
     # plt.show()
     return generated_data

コード例 #9

0

ファイルを表示

    def plot_all_markers_subplots(df, sheet):
        """
        plot all markers of the cow's body as subplots
        and mark the begin of every step
        :param df: Data frame
        :param sheet: Excel sheet

        """
        columns = df.iloc[0, 2:34].index
        fig, axes = plt.subplots(9, 4, figsize=(50, 35))
        cow_name = CowsDataset.get_cow_name(sheet)
        side = sheet[-5:]
        title = cow_name + '  ' + side
        fig.suptitle(title, fontsize=60)
        line = plt.Line2D((0, 1), (0.45, 0.45), color="k", linewidth=7)
        fig.add_artist(line)
        plt_cols = 4
        plt_rows = 8

        coordinates = ['X', 'Y', 'Z', 'R']

        for i, coordinate in enumerate(coordinates):
            plt.subplot(plt_rows + 1, plt_cols, i + 1)
            plt.text(0.5,
                     0.2,
                     horizontalalignment='center',
                     verticalalignment='center',
                     s=coordinate,
                     fontsize=50)
            plt.axis('off')
        for i, column in enumerate(columns):
            # plt.subplot(plt_rows + 1, plt_cols, i + 7 + int(i/4))
            plt.subplot(plt_rows + 1, plt_cols, i + 5)
            plt.plot(df[column])
            Plot_dataset.plot_step_start(df, 'Front_Step', 'red', 15)
            Plot_dataset.plot_step_start(df, 'Back_Step', 'green', 15)
            plt.xlabel(' ', fontsize=30)
            plt.ylabel('\n\n' + column, fontsize=30)
        fig.text(0, 0.65, s='Front leg', rotation='vertical', fontsize=50)
        fig.text(0, 0.15, s='Back leg', rotation='vertical', fontsize=50)
        image_name = cow_name + '_motionvis_' + side + '.png'
        plt.gcf().savefig(os.path.join(os.path.dirname(__file__), image_name))
        plt.show()

コード例 #10

0

ファイルを表示

from Welfare_AI_dataset import CowsDataset
import torch
import torchvision
from torch.utils.data import Dataset, DataLoader
import numpy as np
import os
import pandas as pd
import math

DICTIONARY_PATH = os.path.join(os.path.dirname(__file__), 'Dictionary_Kinematics.xlsx')
DATASET_PATH = os.path.join(os.path.dirname(__file__), 'ScaledCoordinates_Post-Trial.xlsx')
#information about the mother dataset( you can change the Side to 'side1' or 'side2' ) 
#in order to determine the size of the sliding  window
dictionary = pd.read_excel(DICTIONARY_PATH, "Video File -> Excel Tab Names")
dictionary = dictionary.to_numpy()
names = CowsDataset.get_side_sheets('side2')
real_dataset = CowsDataset(names)
window_size = CowsDataset.sliding_window(real_dataset, 'side2')

#normalize the data
def normalize(x):
    x_normed = x / x.max(0, keepdim=True)[0]
    return x_normed


class YKDataset(Dataset):
    def __init__(self, transform=normalize):
        #data loading
        dic_path = os.path.join(os.path.dirname(__file__), 'Generated data', 'labels_dic_' + 'side2' + '.csv')
        pathLabels = np.loadtxt(dic_path, delimiter=',', dtype=str, skiprows=1)#load dictionary with file paths and labels
        self.file_paths = pathLabels[:, 1]

コード例 #11

0

ファイルを表示

ファイル: data&labels_to_csv.py プロジェクト: bioinfoUQAM/Welfare_AI

import pandas as pd
import os
from Data_generation import GeneratedData
from tqdm import tqdm
import numpy as np
from Clustering_dataset import Hierarchical_clustering

DICTIONARY_PATH = os.path.join(os.path.dirname(__file__),
                               'Dictionary_Kinematics.xlsx')
side = 'side2'  #or 'side2'
n_samples = 5  #number of generated cows
n_cows = 9

dictionary = pd.read_excel(DICTIONARY_PATH, "Video File -> Excel Tab Names")
dictionary = dictionary.to_numpy()
names = CowsDataset.get_side_sheets(side)
dataset = CowsDataset(names)
columns = CowsDataset.get_joint_names(dataset)

generated_dataset = []
new_dataset = []  #list of the new generated dataframes of the generated cows
new_sheets = []  #list of the names of the generated cows
new_targets = []  #list of the labels of the generated cows
paths = []  # list of the paths of csv generated cow files
#generate n_samples of every cow
for i, sheet in enumerate(tqdm(dataset)):
    new_dataset.append(
        GeneratedData.create_generated_cow_data(dataset, i, n_samples)[0])
    new_sheets.append(
        GeneratedData.create_generated_cow_data(dataset, i, n_samples)[1])
    new_targets.append(