def _Map_Dict_Generate(self): self.map_Dict = {'Phone': {}, 'Feature': {}} identifiers = ['ALL'] if hp_Dict['Hidden_Analysis']['Only_All'] else ( self.identifier_List + ['ALL']) criteria = np.arange( start=hp_Dict['Hidden_Analysis']['Sensitive_Index']['Criteria'][0], stop=hp_Dict['Hidden_Analysis']['Sensitive_Index']['Criteria'][1] + hp_Dict['Hidden_Analysis']['Sensitive_Index']['Criteria'][2], step=hp_Dict['Hidden_Analysis']['Sensitive_Index']['Criteria'][2], ) progress_Index = 0 for identifier in identifiers: averages = np.stack( [ np.mean(self.activation_Dict['Phone'][phone, identifier] [hp_Dict['Hidden_Analysis']['Sensitive_Index'] ['Step_Range'][0]:hp_Dict['Hidden_Analysis'] ['Sensitive_Index']['Step_Range'][1]], axis=(0, 1)) # [File_Nums, Time, Dim] -> [Dim] for phone in self.phone_List ], axis=1) # [Dim, Phone_Nums] for criterion in criteria: self.map_Dict['Phone'][identifier, criterion] = np.stack( [ self._Map_Calc(x, criterion) # [Phone_Nums] for x in averages ], axis=1) # [Phone_Nums, Dim] progress_Index += 1 progress(progress_Index, len(identifiers) * len(criteria) * 2, status='Map generating...') for identifier in identifiers: averages = np.stack( [ np.mean(self.activation_Dict['Feature'][feature, identifier], axis=(0, 1)) # [File_Nums, Time, Dim] -> [Dim] for feature in self.feature_List ], axis=1) # [Dim, Feature_Nums] for criterion in criteria: self.map_Dict['Feature'][identifier, criterion] = np.stack( [ self._Map_Calc(x, criterion) # [Feature_Nums] for x in averages ], axis=1) # [Feature_Nums, Dim] progress_Index += 1 progress(progress_Index, len(identifiers) * len(criteria) * 2, status='Map generating...') print()
def _Flow_Dict_Generate(self): self.flow_Dict = {'Phone': {}, 'Feature': {}} identifiers = ['ALL'] if hp_Dict['Hidden_Analysis']['Only_All'] else ( self.identifier_List + ['ALL']) for index, identifier in enumerate(identifiers): self.flow_Dict['Phone'][identifier] = np.stack( [ np.transpose( np.mean(self.activation_Dict['Phone'][phone, identifier], axis=0) ) # [File_Nums, Time, Dim] -> [Time, Dim] -> [Dim, Time] for phone in self.phone_List ], axis=1) # [Dim, Phone_Nums, Time] self.flow_Dict['Feature'][identifier] = np.stack( [ np.transpose( np.mean(self.activation_Dict['Feature'][feature, identifier], axis=0) ) # [File_Nums, Time, Dim] -> [Time, Dim] -> [Dim, Time] for feature in self.feature_List ], axis=1) # [Dim, Feature_Nums, Time] progress(index + 1, len(identifiers), status='Flow generating...') print()
def Activation_Dict_Generate(self): pattern_List = self.model.feeder.Get_Test_Pattern_from_Wav( wav_Path_List=self.wav_Path_List) hiddens = [] for index, pattern_Dict in enumerate(pattern_List): hiddens.append(self.model.Hidden_Step(**pattern_Dict)) progress(index + 1, len(pattern_List), status='Activation dict generating...') print() hiddens = np.vstack(hiddens) # [File_Nums, Time, Dim] self.activation_Dict = { 'Phone': { # Gathering by phoneme (phone, identifier): hiddens[[self.wav_Path_Index_Dict[path] for path in path_List]] # [File_Nums, Time, Dim] for (phone, identifier ), path_List in self.phone_File_List_Dict.items() }, 'Feature': { # Gathering by feature (feature, identifier): hiddens[[self.wav_Path_Index_Dict[path] for path in path_List]] # [File_Nums, Time, Dim] for (feature, identifier ), path_List in self.feature_File_List_Dict.items() } }
def Test(self, epoch): infos, pattern_List = self.feeder.Get_Test_Pattern() logits = [] for batch_Index, patterns in enumerate(pattern_List): logits.append(self.Test_Step(**patterns).numpy()) progress( batch_Index + 1, len(pattern_List), status='Testing' ) print() logits = np.vstack(logits) export_Thread = Thread( target=self.Export_Test, args=(infos, logits, epoch) ) #Turning on a new thread for saving result export_Thread.daemon = True export_Thread.start() return export_Thread
def body(step, samples): current_Local_Condition = tf.expand_dims(local_Conditions[:, step, :], axis=1) # global_Condition is always same. Thus, there is no step slicing. x = self.layer_Dict['First'](inputs=tf.expand_dims(samples[:, -1], axis=1), training=tf.convert_to_tensor(False)) skips = 0 for block_Index in range( hp_Dict['WaveNet']['ResConvGLU']['Blocks']): for stack_Index in range( hp_Dict['WaveNet']['ResConvGLU']['Stacks_in_Block']): x, new_Skips = self.layer_Dict['ResConvGLU_{}_{}'.format( block_Index, stack_Index )](inputs=[x, current_Local_Condition, global_Conditions]) skips += new_Skips skips *= np.sqrt( 1.0 / (hp_Dict['WaveNet']['ResConvGLU']['Blocks'] * hp_Dict['WaveNet']['ResConvGLU']['Stacks_in_Block'])) logit = self.layer_Dict['Last'](skips) samples = tf.concat( [samples, Sample_from_Discretized_Mix_Logistic(logit)], axis=-1) try: progress(step + 1, local_Conditions.shape[1], status='({}/{})'.format( step + 1, local_Conditions.get_shape() [1])) #initial time it will be ignored. except: pass return step + 1, samples
def Get_Test_Pattern(self): pattern_Info_List = [] pattern_Info_List.extend([ (word, identifier, 'Training') for word, identifier in self.pattern_Path_Dict['Training'].keys() ]) pattern_Info_List.extend([ (word, identifier, 'Pattern_Excluded') for word, identifier in self.pattern_Path_Dict['Pattern_Excluded'].keys() ]) pattern_Info_List.extend([ (word, identifier, 'Identifier_Excluded') for word, identifier in self.pattern_Path_Dict['Identifier_Excluded'].keys() ]) pattern_Info_List.extend([ (word, identifier, 'Test_Only') for word, identifier in self.pattern_Path_Dict['Test_Only'].keys() ]) pattern_Path_List = [] pattern_Path_List.extend( [path for path in self.pattern_Path_Dict['Training'].values()]) pattern_Path_List.extend([ path for path in self.pattern_Path_Dict['Pattern_Excluded'].values() ]) pattern_Path_List.extend([ path for path in self.pattern_Path_Dict['Identifier_Excluded'].values() ]) pattern_Path_List.extend( [path for path in self.pattern_Path_Dict['Test_Only'].values()]) patterns = {} for index, path in enumerate(pattern_Path_List): with open( os.path.join(hp_Dict['Pattern']['Pattern_Path'], path).replace("\\", "/"), "rb") as f: patterns[path] = pickle.load(f) progress(index + 1, len(pattern_Path_List), status='Test pattern loading') print() max_Step = max( [pattern['Acoustic'].shape[0] for pattern in patterns.values()]) pattern_Batch_List = [ # Split pattern list to genrate batchs pattern_Path_List[x:x + hp_Dict['Train']['Batch_Size']] for x in range(0, len(pattern_Path_List), hp_Dict['Train']['Batch_Size']) ] test_Pattern_List = [] for pattern_Batch in pattern_Batch_List: acoustics = [] # semantics = [] # acoustic_Steps = [] for path in pattern_Batch: pattern_Dict = patterns[path] acoustics.append(pattern_Dict['Acoustic']) # semantics.append(pattern_Dict['Semantic']) # acoustic_Steps.append(pattern_Dict['Acoustic'].shape[0]) acoustics = self.Force_Pattern_Stack( acoustics, max_Step=max_Step).astype(np.float32) # semantics = np.stack(semantics, axis= 0).astype(np.float32) # acoustic_Steps = np.stack(acoustic_Steps, axis= 0).astype(np.int32) test_Pattern_List.append({ 'acoustics': acoustics, # 'acoustic_Steps': acoustic_Steps, # 'semantics': semantics, }) return pattern_Info_List, test_Pattern_List
def Export_Inference(self, letter_String_List, pronunciation_List, hiddens, outputs, word_Label_Indices, phoneme_Label_Indices, added_Word_Labels, epoch, trained_Pattern_Index_Dict, export_Raw=False, file_Tag='Inference'): #Getting the count how many time each pattern is trained trained_Pattern_Count_Dict = { index: 0 for index in self.feeder.word_Index_Dict.values() } for index_List in trained_Pattern_Index_Dict.values(): for index in index_List: trained_Pattern_Count_Dict[index] += 1 if export_Raw: #Pickled raw data saving. export_Dict = { 'Epoch': epoch, 'Pattern_Pair': list(zip(letter_String_List, pronunciation_List)), 'Hidden': hiddens, 'Output': outputs, 'Trained_Pattern_Count_Dict': trained_Pattern_Count_Dict } with open( os.path.join( self.export_Path, 'Inference', '{}{}.Raw.pickle'.format( 'E_{}.'.format(epoch) if not epoch is None else '', file_Tag)), 'wb') as f: pickle.dump(export_Dict, f, protocol=4) pattern_Index_List = list(range( hiddens.shape[0])) #Getting result index pattern_Index_Batch_List = [ pattern_Index_List[x:x + hp_Dict['Analyzer']['Batch_Size']] for x in range(0, len(pattern_Index_List), hp_Dict['Analyzer'] ['Batch_Size']) ] #Analyzing uses much memory, thus pattern must be splitted. Generating batch for analyze. hidden_Dict = {} # A space to store analyzing results. result_Dict = {} # A space to store analyzing results. for index, pattern_Index_Batch in enumerate(pattern_Index_Batch_List): batch_Hidden_Dict = self.hidden_Analyzer( inputs=hiddens[pattern_Index_Batch] ) #In this line, hidden analyzing run. To know details, please see 'Analyzer.py'. for key, value in batch_Hidden_Dict.items(): if not key in hidden_Dict.keys(): hidden_Dict[key] = [] hidden_Dict[key].append(value.numpy()) batch_Result_Dict = self.output_Analyzer( inputs=outputs[pattern_Index_Batch], word_label_indices=word_Label_Indices[pattern_Index_Batch], phoneme_label_indices=phoneme_Label_Indices[ pattern_Index_Batch], added_Word_Labels=added_Word_Labels ) #In this line, result analyzing run. To know details, please see 'Analyzer.py'. for key, value in batch_Result_Dict.items(): if not key in result_Dict.keys(): result_Dict[key] = [] result_Dict[key].append(value.numpy()) progress(index + 1, len(pattern_Index_Batch_List), status='Inference analyzer running') print() hidden_Dict = { key: np.hstack(value_List) if len(value_List[0].shape) == 1 else np.vstack(value_List) for key, value_List in hidden_Dict.items() } # Stack each hidden results. result_Dict = { key: np.hstack(value_List) if len(value_List[0].shape) == 1 else np.vstack(value_List) for key, value_List in result_Dict.items() } # Stack each hidden results. index_Phoneme_Dict = { index: phoneme for phoneme, index in self.feeder.phoneme_Index_Dict.items() if type(phoneme) == str } # Result is a index vector. To change to phoneme list, generating index to phoneme matching. # Result text file generating column_Title_List = [ 'Epoch', 'Ortho', 'Phono', 'Length', 'Probability', 'MeanRT', 'Trained_Count', 'Cosine_Similarity', 'Mean_Squared_Error', 'Euclidean_Distance', 'Cross_Entropy', 'Exported_Pronunciation', 'Accuracy_Max_CS', 'Accuracy_Min_MSE', 'Accuracy_Min_ED', 'Accuracy_Min_CE', 'Accuracy_Pronunciation', 'Hidden_Cosine_Similarity', 'Hidden_Mean_Squared_Error', 'Hidden_Euclidean_Distance', 'Hidden_Cross_Entropy', ] export_List = ['\t'.join(column_Title_List)] for index, (letter_String, target_Pronunciation, rt_CS, rt_MSE, rt_ED, rt_CE, exported_Pronunciation, acc_CS, acc_MSE, acc_ED, acc_CE, acc_Pronunciation, hidden_CS, hidden_MSE, hidden_ED, hidden_CE) in enumerate( zip( letter_String_List, pronunciation_List, result_Dict['RT', 'CS'], result_Dict['RT', 'MSE'], result_Dict['RT', 'ED'], result_Dict['RT', 'CE'], result_Dict['Export', 'Pronunciation'], result_Dict['ACC', 'Max_CS'], result_Dict['ACC', 'Min_MSE'], result_Dict['ACC', 'Min_ED'], result_Dict['ACC', 'Min_CE'], result_Dict['ACC', 'Pronunciation'], hidden_Dict['CS'], hidden_Dict['MSE'], hidden_Dict['ED'], hidden_Dict['CE'], )): is_Word = letter_String in self.feeder.word_Index_Dict.keys() new_Line_List = [ str(epoch or 'None'), letter_String, target_Pronunciation, str(len(letter_String)), str(self.feeder.frequency_Dict[letter_String]) if is_Word else 'None', str(self.feeder.human_RT_Dict[letter_String]) if is_Word else 'None', str(trained_Pattern_Count_Dict[index]) if is_Word else 'None', str(rt_CS), str(rt_MSE), str(rt_ED), str(rt_CE), ''.join([ index_Phoneme_Dict[phoneme_Index] for phoneme_Index in exported_Pronunciation ]), str(acc_CS), str(acc_MSE), str(acc_ED), str(acc_CE), str(acc_Pronunciation), str(hidden_CS), str(hidden_MSE), str(hidden_ED), str(hidden_CE), ] export_List.append('\t'.join(new_Line_List)) with open( os.path.join( self.export_Path, 'Inference', '{}{}.Summary.txt'.format( 'E_{}.'.format(epoch) if not epoch is None else '', file_Tag)), 'w') as f: f.write('\n'.join(export_List))
def RSA_Generate(self, permutation_Nums=100000): os.makedirs(self.export_Path, exist_ok=True) rsa_Dict = {} permutation_Cor_List_Dict = {} rsa_Dict['EARShot', 'Mesgarani'] = { metric_Type: self.RSA_Calc(self.data_Array, self.mestarani_Distance_Dict[metric_Type], metric_Type, False) for metric_Type in self.mertic_Type_List } permutation_Cor_List_Dict['EARShot', 'Mesgarani'] = {} for metric_Type in self.mertic_Type_List: permutation_Cor_List_Dict['EARShot', 'Mesgarani'][metric_Type] = [] for index in range(permutation_Nums): permutation_Cor_List_Dict[ 'EARShot', 'Mesgarani'][metric_Type].append( self.RSA_Calc( self.data_Array, self.mestarani_Distance_Dict[metric_Type], metric_Type, True)[1]) progress(index + 1, permutation_Nums, status='{} EARShot-Mesgarani RSA based on {}'.format( self.data_Type, metric_Type)) print() if self.data_Type == 'PSI': phoneme_Feature_Distance_Dict = { metric_Type: pairwise_distances( self.phoneme_Feature_Array.astype(np.float64), metric=metric_Type) for metric_Type in self.mertic_Type_List } rsa_Dict['EARShot', 'Phoneme_Feature'] = { metric_Type: self.RSA_Calc(self.data_Array, phoneme_Feature_Distance_Dict[metric_Type], metric_Type, False) for metric_Type in self.mertic_Type_List } permutation_Cor_List_Dict['EARShot', 'Phoneme_Feature'] = {} for metric_Type in self.mertic_Type_List: permutation_Cor_List_Dict['EARShot', 'Phoneme_Feature'][metric_Type] = [] for index in range(permutation_Nums): permutation_Cor_List_Dict[ 'EARShot', 'Phoneme_Feature'][metric_Type].append( self.RSA_Calc( self.data_Array, phoneme_Feature_Distance_Dict[metric_Type], metric_Type, True)[1]) progress(index + 1, permutation_Nums, status='{} EARShot-P&F RSA based on {}'.format( self.data_Type, metric_Type)) print() rsa_Dict['Phoneme_Feature', 'Mesgarani'] = { metric_Type: self.RSA_Calc(self.phoneme_Feature_Array, self.mestarani_Distance_Dict[metric_Type], metric_Type, False) for metric_Type in self.mertic_Type_List } permutation_Cor_List_Dict['Phoneme_Feature', 'Mesgarani'] = {} for metric_Type in self.mertic_Type_List: permutation_Cor_List_Dict['Phoneme_Feature', 'Mesgarani'][metric_Type] = [] for index in range(permutation_Nums): permutation_Cor_List_Dict[ 'Phoneme_Feature', 'Mesgarani'][metric_Type].append( self.RSA_Calc( self.phoneme_Feature_Array, self.mestarani_Distance_Dict[metric_Type], metric_Type, True)[1]) progress(index + 1, permutation_Nums, status='{} P&F-Mesgarani RSA based on {}'.format( self.data_Type, metric_Type)) print() for data_Label, base_Label in [('EARShot', 'Mesgarani')] + ([ ('EARShot', 'Phoneme_Feature'), ('Phoneme_Feature', 'Mesgarani') ] if self.data_Type == 'PSI' else []): for metric_Type in self.mertic_Type_List: p_Value = (1 - len( np.less( permutation_Cor_List_Dict[data_Label, base_Label][metric_Type], rsa_Dict[data_Label, base_Label][metric_Type][1])) / len(permutation_Cor_List_Dict[ data_Label, base_Label][metric_Type])) fig = self.Plot_RDM( dm=rsa_Dict[data_Label, base_Label][metric_Type][0], label_List=sort_List_Dict[self.data_Type], metric=metric_Type, fig_title= '{0} {1} DSM: \n {2} cor: {3:.03f} \n Permutation cor: {4:.03f} \n Permutation test: p = {5:.03f}' .format( data_Label, self.data_Type, base_Label, rsa_Dict[data_Label, base_Label][metric_Type][1], np.mean(permutation_Cor_List_Dict[ data_Label, base_Label][metric_Type]), p_Value)) fig.savefig(os.path.join( self.export_Path, 'RSA.{}_to_{}.{}.{}.png'.format(data_Label, base_Label, self.data_Type, metric_Type)), dpi=300) plt.close() extract_List = [','.join(self.mertic_Type_List)] extract_List.append(','.join([ '{}'.format(rsa_Dict[data_Label, base_Label][metric_Type][1]) for metric_Type in self.mertic_Type_List ])) with open( os.path.join( self.export_Path, 'RSA.{}_to_{}.{}.Actual.csv'.format( data_Label, base_Label, self.data_Type)), 'w') as f: f.write('\n'.join(extract_List)) extract_List = [','.join(self.mertic_Type_List)] extract_List.extend([ ','.join(['{}'.format(x) for x in permutation_List]) for permutation_List in zip(*[ permutation_Cor_List_Dict[data_Label, base_Label][metric_Type] for metric_Type in self.mertic_Type_List ]) ]) with open( os.path.join( self.export_Path, 'RSA.{}_to_{}.{}.Shuffle.csv'.format( data_Label, base_Label, self.data_Type)), 'w') as f: f.write('\n'.join(extract_List))
def Analysis(self, batch_Steps=200): result_File_List = sorted([ #Result files sorting os.path.join(self.result_Path, 'Test', x).replace('\\', '/') for x in os.listdir( os.path.join(self.result_Path, 'Test').replace('\\', '/')) if x.endswith('.pickle') and x != 'Metadata.pickle' ]) reaction_Times = [ '\t'.join([ '{}'.format(x) for x in [ 'Epoch', 'Word', 'Identifier', 'Pattern_Type', 'Pronunciation', 'Pronunciation_Length', 'Uniqueness_Point', 'Cohort_N', 'Rhyme_N', 'Neighborhood_N', 'Onset_Absolute_RT', 'Onset_Relative_RT', 'Onset_Time_Dependent_RT', 'Offset_Absolute_RT', 'Offset_Relative_RT', 'Offset_Time_Dependent_RT' ] ]) ] category_Flows = [ '\t'.join([ '{}'.format(x) for x in [ 'Epoch', 'Word', 'Identifier', 'Pattern_Type', 'Pronunciation', 'Pronunciation_Length', 'Uniqueness_Point', 'Cohort_N', 'Rhyme_N', 'Neighborhood_N', 'Category', 'Category_Count', 'Accuracy' ] + list(range(self.max_Step)) ]) ] for result_File in result_File_List: with open(result_File, 'rb') as f: result_Dict = pickle.load(f) epoch = result_Dict['Epoch'] infos = result_Dict['Info'] outputs = result_Dict['Result'] #[Batch, Steps, Dims] for index, (output, (word, identifier, pattern_Type)) in enumerate(zip(outputs, infos)): data = self.Data_Generate(output, word, identifier, batch_Steps) #[Num_Words, Steps] rt_Dict = self.RT_Generate(word, identifier, data) category_Flow_Dict = self.Category_Flow_Generate(word, data) reaction_Times.append('\t'.join([ '{}'.format(x) for x in [ epoch, word, identifier, pattern_Type, '.'.join( self.pattern_Metadata_Dict['Pronunciation_Dict'] [word]), len(self.pattern_Metadata_Dict['Pronunciation_Dict'] [word]), self.adjusted_Length_Dict[word], len(self.category_Dict[word, 'Cohort']), len(self.category_Dict[word, 'Rhyme']), len(self.category_Dict[word, 'DAS_Neighborhood']), rt_Dict['Onset', 'Absolute'], rt_Dict['Onset', 'Relative'], rt_Dict['Onset', 'Time_Dependent'], rt_Dict['Offset', 'Absolute'], rt_Dict['Offset', 'Relative'], rt_Dict['Offset', 'Time_Dependent'] ] ])) for category in [ "Target", "Cohort", "Rhyme", "Unrelated", "Other_Max" ]: if category == "Other_Max": category_Count = np.nan else: category_Count = len(self.category_Dict[word, category]) category_Flows.append('\t'.join([ '{}'.format(x) for x in [ epoch, word, identifier, pattern_Type, '.'.join( self. pattern_Metadata_Dict['Pronunciation_Dict'] [word]), len(self. pattern_Metadata_Dict['Pronunciation_Dict'] [word]), self.adjusted_Length_Dict[word], len(self.category_Dict[word, 'Cohort']), len(self.category_Dict[word, 'Rhyme']), len(self.category_Dict[word, 'DAS_Neighborhood']), category, category_Count, not np.isnan(rt_Dict["Onset", "Time_Dependent"]) ] + [ '{:.5f}'.format(x) for x in category_Flow_Dict[category] ] ])) progress(index + 1, outputs.shape[0], status=result_File) print() with open( os.path.join(self.result_Path, 'Test', 'RTs.txt').replace('\\', '/'), 'w') as f: f.write('\n'.join(reaction_Times)) with open( os.path.join(self.result_Path, 'Test', 'Category_Flows.txt').replace('\\', '/'), 'w') as f: f.write('\n'.join(category_Flows))
for j in range(n): X[:, j] = (X[:, j] - X[:, j].mean()) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=11) all_theta = np.zeros((k, n + 1)) #Entrenamiento #OneVsAll print("Entrenando") i = 0 for flor in Species: progress(i + 1, k) tmp_y = np.array(y_train == flor, dtype=int) optTheta = logisticRegression(X_train, tmp_y, np.zeros((n + 1, 1))).x all_theta[i] = optTheta i += 1 #Predicciones P = sigmoid(X_test.dot(all_theta.T)) p = [Species[np.argmax(P[i, :])] for i in range(X_test.shape[0])] s = sum(np.array(p == y_test, dtype=int)) print("\n\n") print("Test Accuracy ", (s / X_test.shape[0]) * 100, "%")