def create_generated_cow_data(dataset, sheet_index, n_samples): """ generate new samples of columns(joints) of one sheet(cow) :param n_samples: The number of synthetic samples to generate :param dataset: object from class Cowsdataset :param sheet_index: the index (beginning from 0) of the sheet of the excel file :return: all the new samples of the different columns/joints of the cow """ file_name = CowsDataset.get_cow_names()[sheet_index] writer = pd.ExcelWriter(file_name + '_Medfilt7%.xlsx', engine='xlsxwriter') sheets = [] new_dataset = [] for i in range(n_samples): sheet_name = file_name #+ str(i) sheets.append(sheet_name) generated_data = [] for j, column in enumerate(dataset[sheet_index]): generated_data.append( GeneratedData.generate_samples(dataset, sheet_index, column)) df = pd.DataFrame(generated_data).T df.columns = CowsDataset.get_joint_names( dataset ) # Convert the dataframe to an XlsxWriter Excel object. new_dataset.append(df) #df.to_excel(writer, sheet_name=sheet_name, index=False) #GeneratedData.generated_data_to_excel(df, file_name, sheet_name) return new_dataset, sheets
def create_super_dissimilarity_matrix(dataset): list_of_joints = CowsDataset.get_list_of_joints(dataset) dissimilarity_matrix = [] for i, df in enumerate(list_of_joints): dissimilarity_matrix.append( Hierarchical_clustering.pearson_correlation(list_of_joints, i)) return Hierarchical_clustering.create_mean_matrix(dissimilarity_matrix)
def plot_cosine_trees(dataset): list_of_joints = CowsDataset.get_list_of_joints(dataset) for i, df in enumerate(list_of_joints): dissimilarity_cosine = Hierarchical_clustering.cosine_correlation( list_of_joints, i) dissimilarity_cosine = ssd.squareform(dissimilarity_cosine, checks=False) hierarchy_cosine = linkage(dissimilarity_cosine, method='average') cow_names = [ CowsDataset.get_cow_name(sheet) for i, sheet in enumerate(dataset.sheet_names) ] f, ax = plt.subplots(1, 1) dnCosine = dendrogram(hierarchy_cosine, ax=ax, labels=cow_names, leaf_rotation=90, leaf_font_size=10) joints = CowsDataset.get_joint_names(dataset) ax.set_title('Dendrogram Cosine' + joints[i])
def get_labels_clustering_corr(dataset): list_of_joints = CowsDataset.get_list_of_joints(dataset) for i, df in enumerate(list_of_joints): dissimilarity_corr = Hierarchical_clustering.dissimilarity_matrix( list_of_joints, i) dissimilarity_corr = ssd.squareform(dissimilarity_corr, checks=False) hierarchyCorr = linkage(dissimilarity_corr, method='average') labelsCorr = fcluster(hierarchyCorr, t=2, criterion='maxclust') print('labelsCorr : ', labelsCorr) return labelsCorr
def get_labels_clustering_cosine(dataset): list_of_joints = CowsDataset.get_list_of_joints(dataset) for i, df in enumerate(list_of_joints): dissimilarity_cosine = Hierarchical_clustering.cosine_correlation( list_of_joints, i) dissimilarity_cosine = ssd.squareform(dissimilarity_cosine, checks=False) hierarchyCosine = linkage(dissimilarity_cosine, method='average') labelsCosine = fcluster(hierarchyCosine, t=2, criterion='maxclust') print('labelsCosine : ', labelsCosine) return labelsCosine
def plot_corr_trees(dataset): list_of_joints = CowsDataset.get_list_of_joints(dataset) for i, df in enumerate(list_of_joints): dissimilarity_matrix = Hierarchical_clustering.dissimilarity_matrix( list_of_joints, i) dissimilarity_matrix = ssd.squareform(dissimilarity_matrix, checks=False) hierarchyCorr = linkage(dissimilarity_matrix, method='average') cow_names = [ CowsDataset.get_cow_name(sheet) for i, sheet in enumerate(dataset.sheet_names) ] f, ax = plt.subplots(1, 1) dnCorr = dendrogram(hierarchyCorr, ax=ax, labels=cow_names, leaf_rotation=90, leaf_font_size=10) joints = CowsDataset.get_joint_names(dataset) ax.set_title('Dendrogram Correlation' + joints[i]) plt.rcParams.update({'figure.max_open_warning': 0}) # # lenCosine = len(dissimilarityCosine) # lenCorr = len(dissimilarityCorrelation) # superDissimilarityCosine = sumCosine / lenCosine # superDissimilarityCorr = sumCorr / lenCorr # hierarchyCosine = linkage(superDissimilarityCosine, method='average') # hierarchyCorr = linkage(superDissimilarityCorr, method='average') # f, axes = plt.subplots(1, 2, sharey=True) # dnCosine = dendrogram(hierarchyCosine, ax=axes[0], labels=cowNames, leaf_rotation=90, leaf_font_size=10) # axes[0].set_ylabel('dendrogram Cosine Side2 with all the cows') # dnCorr = dendrogram(hierarchyCorr, ax=axes[1], labels=cowNames, leaf_rotation=90, leaf_font_size=10) # axes[1].set_ylabel('dendrogram Correlation Side2 with all the cows') # labelsCosine = fcluster(hierarchyCosine, t=2, criterion='maxclust') # labelsCorr = fcluster(hierarchyCorr, t=2, criterion='maxclust') # kCos = KMeans(n_clusters=2, random_state=0).fit_predict(superDissimilarityCosine) # kCorr = KMeans(n_clusters=2, random_state=0).fit_predict(superDissimilarityCorr) # print(kCos) # print(kCorr)
def plot_subplots_joint(dataset): """ Parameters ---------- dataset : TYPE DESCRIPTION. Returns change side1 and side2 ------- None. """ joint_names = CowsDataset.get_joint_names(dataset) list_of_joints = dataset.get_list_of_joints() for i, column in enumerate(joint_names): fig, axes = plt.subplots(3, 3, figsize=(15, 8), sharey='col') side = dataset.sheet_names[0][-5:] title = column + ' ' + side fig.suptitle(title, fontsize=20) min = Plot_dataset.get_min_scale_value(dataset, column) max = Plot_dataset.get_max_scale_value(dataset, column) for j, ax in enumerate(axes.flat): cow_name = CowsDataset.get_cow_name(dataset.sheet_names[j]) plt.subplot(3, 3, j + 1) ax = plt.plot( dataset.remove_outliers(j, column, number=0.05, order=10)) plt.ylim(min, max) plt.title("\n\n" + cow_name) plt.subplots_adjust(wspace=3) Plot_dataset.plot_step_start(dataset[j], 'Front_Step', 'red', 7) Plot_dataset.plot_step_start(dataset[j], 'Back_Step', 'green', 7) fig.tight_layout() image_name = column + '_motionvis_' + '.png' plt.gcf().savefig( os.path.join(os.path.dirname(__file__), 'Plot', 'joints', 'side2', image_name)) plt.show()
def generate_samples(dataset, sheet_index, column_name): """ generate new sample of column(joint) in the data by adding a random value between [-2%,+2%] to the original values :param dataset: Object of class CowsDataset :param sheet_index: the index (beginning from 0) of the sheet of the excel file :param column_name: String The name of the joint :return: new synthetic data based on the real data """ generated_data = [] movement_begin = np.where( ~(np.isnan(dataset[sheet_index][column_name])) ) # to see where the movement begins nan_indexes = np.where((np.isnan(dataset[sheet_index][column_name]))) #print(movement_begin) joint_column = dataset[sheet_index][column_name] joint_column = CowsDataset.remove_outliers(dataset, sheet_index, column_name, number=0.05, order=10) #remove outliers joint_column = joint_column.round(3) # round with 3 decimal numbers # std_deviation = np.std(joint_column) # sigma = 3*std_deviation kernel_size = 9 for i, row in enumerate(joint_column): if np.isnan(row): generated_data.append(row) else: b_min = int(min(-0.02 * row, 0.02 * row)) b_max = -b_min #random.seed(i) generated_data.append(row + random.randint(b_min, b_max)) #generated_data = pd.DataFrame(generated_data) #generated_data = shift(medfilt(generated_data), shift=movement_begin, cval=np.NaN)# I don't know why il doesn't work with movement_begin[0] #for the first value( je crois le problème est là!!) generated_data = medfilt(generated_data, kernel_size=kernel_size) #generated_data = gaussian_filter(generated_data, sigma) # line1, = plt.plot(generated_data, label='generated_data' + column_name) # line2, = plt.plot(joint_column, label='real_data' + column_name) # plt.legend(handler_map={line1: HandlerLine2D(numpoints=4)}) # plt.show() # ax = joint_column.plot.hist(bins=15) # plt.show() return generated_data
def plot_all_markers_subplots(df, sheet): """ plot all markers of the cow's body as subplots and mark the begin of every step :param df: Data frame :param sheet: Excel sheet """ columns = df.iloc[0, 2:34].index fig, axes = plt.subplots(9, 4, figsize=(50, 35)) cow_name = CowsDataset.get_cow_name(sheet) side = sheet[-5:] title = cow_name + ' ' + side fig.suptitle(title, fontsize=60) line = plt.Line2D((0, 1), (0.45, 0.45), color="k", linewidth=7) fig.add_artist(line) plt_cols = 4 plt_rows = 8 coordinates = ['X', 'Y', 'Z', 'R'] for i, coordinate in enumerate(coordinates): plt.subplot(plt_rows + 1, plt_cols, i + 1) plt.text(0.5, 0.2, horizontalalignment='center', verticalalignment='center', s=coordinate, fontsize=50) plt.axis('off') for i, column in enumerate(columns): # plt.subplot(plt_rows + 1, plt_cols, i + 7 + int(i/4)) plt.subplot(plt_rows + 1, plt_cols, i + 5) plt.plot(df[column]) Plot_dataset.plot_step_start(df, 'Front_Step', 'red', 15) Plot_dataset.plot_step_start(df, 'Back_Step', 'green', 15) plt.xlabel(' ', fontsize=30) plt.ylabel('\n\n' + column, fontsize=30) fig.text(0, 0.65, s='Front leg', rotation='vertical', fontsize=50) fig.text(0, 0.15, s='Back leg', rotation='vertical', fontsize=50) image_name = cow_name + '_motionvis_' + side + '.png' plt.gcf().savefig(os.path.join(os.path.dirname(__file__), image_name)) plt.show()
from Welfare_AI_dataset import CowsDataset import torch import torchvision from torch.utils.data import Dataset, DataLoader import numpy as np import os import pandas as pd import math DICTIONARY_PATH = os.path.join(os.path.dirname(__file__), 'Dictionary_Kinematics.xlsx') DATASET_PATH = os.path.join(os.path.dirname(__file__), 'ScaledCoordinates_Post-Trial.xlsx') #information about the mother dataset( you can change the Side to 'side1' or 'side2' ) #in order to determine the size of the sliding window dictionary = pd.read_excel(DICTIONARY_PATH, "Video File -> Excel Tab Names") dictionary = dictionary.to_numpy() names = CowsDataset.get_side_sheets('side2') real_dataset = CowsDataset(names) window_size = CowsDataset.sliding_window(real_dataset, 'side2') #normalize the data def normalize(x): x_normed = x / x.max(0, keepdim=True)[0] return x_normed class YKDataset(Dataset): def __init__(self, transform=normalize): #data loading dic_path = os.path.join(os.path.dirname(__file__), 'Generated data', 'labels_dic_' + 'side2' + '.csv') pathLabels = np.loadtxt(dic_path, delimiter=',', dtype=str, skiprows=1)#load dictionary with file paths and labels self.file_paths = pathLabels[:, 1]
import pandas as pd import os from Data_generation import GeneratedData from tqdm import tqdm import numpy as np from Clustering_dataset import Hierarchical_clustering DICTIONARY_PATH = os.path.join(os.path.dirname(__file__), 'Dictionary_Kinematics.xlsx') side = 'side2' #or 'side2' n_samples = 5 #number of generated cows n_cows = 9 dictionary = pd.read_excel(DICTIONARY_PATH, "Video File -> Excel Tab Names") dictionary = dictionary.to_numpy() names = CowsDataset.get_side_sheets(side) dataset = CowsDataset(names) columns = CowsDataset.get_joint_names(dataset) generated_dataset = [] new_dataset = [] #list of the new generated dataframes of the generated cows new_sheets = [] #list of the names of the generated cows new_targets = [] #list of the labels of the generated cows paths = [] # list of the paths of csv generated cow files #generate n_samples of every cow for i, sheet in enumerate(tqdm(dataset)): new_dataset.append( GeneratedData.create_generated_cow_data(dataset, i, n_samples)[0]) new_sheets.append( GeneratedData.create_generated_cow_data(dataset, i, n_samples)[1]) new_targets.append(