def AggregateResults (GoldTEESXML,PredFileList,EpochFocus,AggeragationFolder,Description, ShouldWriteBackAggregation): import copy as cp ; print "-"*80 print Description print GoldTEESXML print "-"*20 for idx , fileaddr in enumerate (PredFileList): #print idx+1 , fileaddr assert "EpochNo"+ str(EpochFocus)+".xml" in fileaddr assert GE.FILE_CheckFileExists (fileaddr) == True #print "-"*20 assert GE.FILE_CheckFileExists (GoldTEESXML) == True if ShouldWriteBackAggregation: if not GE.OS_IsDirectory (AggeragationFolder): GE.OS_MakeDir (AggeragationFolder) if AggeragationFolder[-1]<>"/": AggeragationFolder+="/" #Process GOLD GoldTree = LoadTEESFile_GetTreeRoot (GoldTEESXML, True) GoldData = Get_Interaction_Results (GoldTree) GoldKeys = sorted(GoldData.keys()) y_true = np.array ([GoldData[interaction_id] for interaction_id in GoldKeys] , dtype=np.int16) PredMetric = [] Agg_results = {} for FileAddress in PredFileList: print "/".join (FileAddress.split("/")[-4:]) PredTree = LoadTEESFile_GetTreeRoot (FileAddress, True) PredData = Get_Interaction_Results (PredTree) assert sorted(PredData.keys ()) == GoldKeys y_pred = [] for interaction_key in GoldKeys: y_pred.append (PredData[interaction_key]) #Create Key in Agg_results dict if not exists if not interaction_key in Agg_results: Agg_results[interaction_key] = 0 ; #update Agg_results dict ... Agg_results[interaction_key]+= PredData[interaction_key] y_pred = np.array (y_pred , dtype=np.int16) EvaluationResult = Evaluate (y_true, y_pred) PredMetric.append (EvaluationResult["positive"]["f1-score"]) print "-"*20 , "\n" , len(PredFileList) , "MEAN : " , f_round (np.mean (PredMetric)) , "STD: " , f_round (np.std (PredMetric)) , "\n" Agg_Thresholds_Evaluation = [] for threshold_level in range(1, len(PredFileList)+1): y_pred = [] ; for interaction_key in GoldKeys: if Agg_results[interaction_key] >= threshold_level: y_pred.append (1) else: y_pred.append (0) y_pred = np.array (y_pred , dtype=np.int16) EvaluationResult = Evaluate (y_true, y_pred) print '{:2d}'.format(threshold_level), f_round (EvaluationResult["positive"]["recall"]), f_round(EvaluationResult["positive"]["precision"]), f_round(EvaluationResult["positive"]["f1-score"]) Agg_Thresholds_Evaluation.append ( (threshold_level, EvaluationResult["positive"]["recall"], EvaluationResult["positive"]["precision"], EvaluationResult["positive"]["f1-score"] ) ) Best_Threshold = sorted (Agg_Thresholds_Evaluation , key = lambda x: x[3] , reverse=True)[0] print "\n" , f_round (np.mean (PredMetric)) + "\t" + f_round (np.std (PredMetric)) + "\t" + '{:2d}'.format(Best_Threshold[0]) + "\t" + f_round (Best_Threshold[1]) + "\t" + f_round (Best_Threshold[2]) + "\t"+ f_round (Best_Threshold[3]) FINAL_RES_STRING = f_round (np.mean (PredMetric)) + ";" + f_round (np.std (PredMetric)) + ";" + '{:2d}'.format(Best_Threshold[0]) + ";" + f_round (Best_Threshold[1]) + ";" + f_round (Best_Threshold[2]) + ";"+ f_round (Best_Threshold[3]) if ShouldWriteBackAggregation: WRITTEN_Agg_Thresholds_Evaluation = [] for threshold_level in range(1, len(PredFileList)+1): writeback_agg_results = {} for interaction_key in GoldKeys: if Agg_results[interaction_key] >= threshold_level: writeback_agg_results[interaction_key] = 1 else: writeback_agg_results[interaction_key] = 0 gold_tree = cp.deepcopy (GoldTree) for interaction_element in gold_tree.findall (".//interaction"): interaction_id = interaction_element.attrib["id"] interaction_e1 = interaction_element.attrib["e1"] interaction_e2 = interaction_element.attrib["e2"] key = (interaction_id,interaction_e1,interaction_e2) interaction_type = "Lives_In" if writeback_agg_results[key]==1 else "neg" interaction_element.set('type',interaction_type) wb_filename = "Aggr_Epoch_" + str(EpochFocus) + "_Threshold_" + str(threshold_level) + "_" ; if PredFileList[0].split("/")[-1].startswith ("LinComb"): wb_filename+= "LinComb" else: wb_filename+= "Forward" wb_filename+= ".xml" PredictedFileAddress = AggeragationFolder + wb_filename LOADSAVE_save_xml(gold_tree, PredictedFileAddress) EvaluationResult = CompareWithGold (GoldData, PredictedFileAddress) WRITTEN_Agg_Thresholds_Evaluation.append ( (threshold_level, EvaluationResult["positive"]["recall"], EvaluationResult["positive"]["precision"], EvaluationResult["positive"]["f1-score"] ) ) print PredictedFileAddress, " " , '{:2d}'.format(threshold_level), f_round (EvaluationResult["positive"]["recall"]), f_round(EvaluationResult["positive"]["precision"]), f_round(EvaluationResult["positive"]["f1-score"]) assert Agg_Thresholds_Evaluation == WRITTEN_Agg_Thresholds_Evaluation #import pdb ; pdb.set_trace() return FINAL_RES_STRING
def ConfigFileVerification (self): #1: Checking Class types if not "CLASSES" in self.Configs: self.PROGRAM_Halt ("Missing CLASSES dictionary in Config file."); LOCAL_CLASSES = self.Configs["CLASSES"]; if not "Negative" in LOCAL_CLASSES: self.PROGRAM_Halt ("Negative class should be defined in the config file."); if not "Positive" in LOCAL_CLASSES: self.PROGRAM_Halt ("Positive class should be defined in the config file."); if len(LOCAL_CLASSES) <> 2: self.PROGRAM_Halt ('There should be only Positive and Negatives classes. For Multiclass use e.g., "Positive": ["class1", "class2"]'); N , P = [] , [] ; if isinstance (LOCAL_CLASSES["Negative"] , basestring): N.append (LOCAL_CLASSES["Negative"]); elif isinstance (LOCAL_CLASSES["Negative"] , list): N.extend (LOCAL_CLASSES["Negative"]); if len(N) != len(set(N)): self.PROGRAM_Halt ("Negative class cannot have duplicate defined types."); if isinstance (LOCAL_CLASSES["Positive"] , basestring): P.append (LOCAL_CLASSES["Positive"]); elif isinstance (LOCAL_CLASSES["Positive"] , list): P.extend (LOCAL_CLASSES["Positive"]); if len(P) != len(set(P)): self.PROGRAM_Halt ("Positive class cannot have duplicate defined types."); if len (set(N).intersection(set(P))) > 0: self.PROGRAM_Halt ("Shared defined class type between Positive and Negative class."); self.__DO_NOT_SET_ME["CLASSES"] = {"Negative": None, "Positive":None} ; self.__DO_NOT_SET_ME["CLASSES"]["Negative"] = set (i.lower() for i in N); #<<<CRITICAL>>> LOWER CLASS NAMES self.__DO_NOT_SET_ME["CLASSES"]["Positive"] = set (i.lower() for i in P); #<<<CRITICAL>>> LOWER CLASS NAMES #What labels should be used when writing back prediction results into xmls: self.__DO_NOT_SET_ME["WRITEBACK_CLASSES"] = {"Negative": None, "Positive":None} ; self.__DO_NOT_SET_ME["WRITEBACK_CLASSES"]["Negative"] = set (i for i in N); #<<<CRITICAL>>> LOWER CLASS NAMES self.__DO_NOT_SET_ME["WRITEBACK_CLASSES"]["Positive"] = set (i for i in P); #<<<CRITICAL>>> LOWER CLASS NAMES #Renaming relations into other relations ... #Good for lots of things, for example integrating many non-relevant relations into a single group if not "RENAME_CLASSES" in self.Configs: self.__DO_NOT_SET_ME["RENAME_CLASSES"] = {} else: if not isinstance (self.Configs["RENAME_CLASSES"], dict): self.PROGRAM_Halt ("RENAME_CLASSES in the config file should be a dictionary or not given at all."); temp_rename_class = {} for key in self.Configs["RENAME_CLASSES"]: if not isinstance(key, basestring): self.PROGRAM_Halt ("Problem in RENAME_CLASSES in the config file. Each keys in dictionary should be string. Problematic: " + str(key)) if isinstance (self.Configs["RENAME_CLASSES"][key],basestring): x = self.Configs["RENAME_CLASSES"][key].lower() if (not x in self.__DO_NOT_SET_ME["CLASSES"]["Negative"]) and (not x in self.__DO_NOT_SET_ME["CLASSES"]["Positive"]): self.PROGRAM_Halt ("Problem in RENAME_CLASSES in the config file. Value should belong to defined positive or negative classes. Problematic: " + x); temp_rename_class[key.lower()] = x elif isinstance(self.Configs["RENAME_CLASSES"][key],list): allvalues = [] for x in self.Configs["RENAME_CLASSES"][key]: x = x.lower() if (not x in self.__DO_NOT_SET_ME["CLASSES"]["Negative"]) and (not x in self.__DO_NOT_SET_ME["CLASSES"]["Positive"]): self.PROGRAM_Halt ("Problem in RENAME_CLASSES in the config file. Value should belong to defined positive or negative classes. Problematic: " + x); allvalues.append(x) temp_rename_class[key.lower()] = allvalues else: self.PROGRAM_Halt ("RENAME_CLASSES in the config file should be a dictionary, each value should be string or list of strings") self.__DO_NOT_SET_ME["RENAME_CLASSES"] = temp_rename_class self.lp (["-"*80 , "[WARNING]: RENAMING RELATIONS INTO OTHER RELATIONS:" , str(self.__DO_NOT_SET_ME["RENAME_CLASSES"]) , "-"*80]) #<<<CRITICAL>>> """ # - OneHotEncodingForMultiClass: is used for real classification and prediction and creating the softmax columns. If we have 10 classes in total, and define 3 classes (i.e., "a","b","c") as Negative in the config file, the other 7 will be regarded as positive, and will have 8 columns in the softmax. Index 0 in the softmax will be always for negative label(S). This has the artificial name "negative". 1) Function: ProcessRoot in Preprocessing.py file: ---------------------------------------------------------- if pair_type in self.Configs["CLASSES"]["Negative"]: positive=False; class_tp=None ; elif pair_type in self.Configs["CLASSES"]["Positive"]: positive=True; class_tp=pair_type ; else: self.PROGRAM_Halt ("Unknown class type for interaction:" + pair_type + "\n" + str(pair_attrib)); 2) Function: Create_FeatureMatrix in NetworkInputGeneration.py file: ---------------------------------------------------------- HowManyColumnsForOneHotEncoding = len (self.Configs["OneHotEncodingForMultiClass"]); y = np.zeros ((Total_Example_CNT, HowManyColumnsForOneHotEncoding),dtype=np.int16); .... if self.Configs["ClassificationType"]== "binary": y [seen_pair_count] = 1 if (pair["POSITIVE"]==True) else 0 ; else: if pair["POSITIVE"]==False: y[seen_pair_count,0]=1;#index zero is always for negative class(ES)!!! else: OneHotIndex = self.Configs["OneHotEncodingForMultiClass"][pair["CLASS_TP"]]; y[seen_pair_count, OneHotIndex] = 1 ; 3) Function __Evaluate in RelationExtractionPipeline.py: ---------------------------------------------------------- USes for evaluation. #- WRITEBACK_OneHotEncodingForMultiClass: Like above, but this is used for writing back the prediction results into XML file. <<<CRITICAL>>>: ALL negative predictions will be written with the negative class label of index 0 in list of "Negative" in the config file. So: "CLASSES" : { "Negative" : ["neg" , "d"], "Positive" : ["a","b",c"] } ---> ALL NEGATIVE PREDICTIONS WILL GET "neg" class label. "CLASSES" : { "Negative" : ["d" , "neg"], "Positive" : ["a","b",c"] } ---> ALL NEGATIVE PREDICTIONS WILL GET "d" class label. """ FIRSTneg = N[0] OneHotEncodingForMultiClass = {u"negative":0}; WRITEBACK_OneHotEncodingForMultiClass = {FIRSTneg:0}; for i, j in enumerate (sorted(P)): OneHotEncodingForMultiClass[j.lower()] = i+1 ; WRITEBACK_OneHotEncodingForMultiClass[j] = i+1 ; self.__DO_NOT_SET_ME["OneHotEncodingForMultiClass"] = OneHotEncodingForMultiClass ; self.__DO_NOT_SET_ME["WRITEBACK_OneHotEncodingForMultiClass"] = WRITEBACK_OneHotEncodingForMultiClass ; #Coarse class labels #Example: Cause_Effect(e1,e2) and Cause_Effect(e2,e1) ==> Cause_Effect #First get list, then use SET (to avoird duplicates), then list and sorted. Coarse_Classes = sorted(list(set([class_tp.split("(e1,e2)")[0].split("(e2,e1)")[0] for class_tp in self.__DO_NOT_SET_ME["CLASSES"]["Positive"]]))); OneHotEncodingForCoarseMultiClass = {u"negative":0}; for i, j in enumerate(Coarse_Classes): OneHotEncodingForCoarseMultiClass[j.lower()] = i+1 ; self.__DO_NOT_SET_ME["OneHotEncodingForCoarseMultiClass"] = OneHotEncodingForCoarseMultiClass ; #2: ClassificationType if not "ClassificationType" in self.Configs: self.PROGRAM_Halt ("Missing Section: ClassificationType (should be either 'Binary' or 'Multiclass'."); if not self.Configs["ClassificationType"].lower() in ['binary','multiclass']: self.PROGRAM_Halt ("Missing Section: ClassificationType (should be either 'Binary' or 'Multiclass'. GIVEN:" + str(self.Configs["ClassificationType"])); self.__DO_NOT_SET_ME["ClassificationType"] = self.Configs["ClassificationType"].lower(); # <<<CRITICAL>>> LOWER CLASSIFICATION TYPE #3: ExampleGeneration if not "ExampleGeneration" in self.Configs: self.PROGRAM_Halt ("Missing ExampleGeneration dictionary in the config file."); """ if not "HaltIfNoSDP" in self.Configs["ExampleGeneration"]: self.PROGRAM_Halt ("Missing bool HaltIfNoSDP in ExampleGeneration dictionary in the config file."); if not isinstance (self.Configs["ExampleGeneration"]["HaltIfNoSDP"], bool): self.PROGRAM_Halt ("HaltIfNoSDP in the ExampleGeneration dictionary in the config file should be either true or false."); if not "SDP_DIRECTION" in self.Configs["ExampleGeneration"]: self.PROGRAM_Halt ("Missing string SDP_DIRECTION in ExampleGeneration dictionary in the config file."); if not isinstance (self.Configs["ExampleGeneration"]["SDP_DIRECTION"], unicode): self.PROGRAM_Halt ("SDP_DIRECTION in ExampleGeneration dictionary in the config file should be string."); SDP_DIRECTION = self.Configs["ExampleGeneration"]["SDP_DIRECTION"].lower(); if not SDP_DIRECTION in ["from_e1value_to_e2value" , "from_e2value_to_e1value" , "from_firstoccurring_to_second" , "from_secondoccurring_to_first"]: self.PROGRAM_Halt ("SDP_DIRECTION in ExampleGeneration dictionary in the config file should be one of 'from_e1value_to_e2value' , 'from_e2value_to_e1value' , 'from_firstoccurring_to_second' , 'from_secondoccurring_to_first'."); self.__DO_NOT_SET_ME["ExampleGeneration"]["SDP_DIRECTION"] = SDP_DIRECTION ; if not "Generate_Reversed_SDP_Features" in self.Configs["ExampleGeneration"]: self.PROGRAM_Halt ("Missing boolean Generate_Reversed_SDP_Features in ExampleGeneration dictionary in the config file."); if not isinstance (self.Configs["ExampleGeneration"]["Generate_Reversed_SDP_Features"], bool): self.PROGRAM_Halt ("Generate_Reversed_SDP_Features in ExampleGeneration dictionary in the config file should be bool."); if not "SDP_MAXLEN_BECAREFUL" in self.Configs["ExampleGeneration"]: self.__DO_NOT_SET_ME["ExampleGeneration"]["SDP_MAXLEN_BECAREFUL"] = None ; self.lp ("[INFO]: SDP_MAXLEN_BECAREFUL is ignored. SDP will have length as max of bags.") else: if (not isinstance(self.Configs["ExampleGeneration"]["SDP_MAXLEN_BECAREFUL"], int)) or (self.Configs["ExampleGeneration"]["SDP_MAXLEN_BECAREFUL"] <= 1): self.PROGRAM_Halt ("SDP_MAXLEN_BECAREFUL in ExampleGeneration dictionary in the config file should be int and > 1."); if not "Directional_Dependency_Types" in self.Configs["ExampleGeneration"]: self.PROGRAM_Halt ("Missing boolean Directional_Dependency_Types in ExampleGeneration dictionary in the config file."); else: if (not isinstance(self.Configs["ExampleGeneration"]["Directional_Dependency_Types"], bool)): self.PROGRAM_Halt ("Directional_Dependency_Types in ExampleGeneration dictionary in the config file should be either true or false."); if not "Use_General_prep_prepc_conj_DT" in self.Configs["ExampleGeneration"]: self.PROGRAM_Halt ("Missing boolean Use_General_prep_prepc_conj_DT in ExampleGeneration dictionary in the config file."); else: if (not isinstance(self.Configs["ExampleGeneration"]["Use_General_prep_prepc_conj_DT"], bool)): self.PROGRAM_Halt ("Use_General_prep_prepc_conj_DT in ExampleGeneration dictionary in the config file should be either true or false."); """ #ActionON_CrossSentenceExamples: (1)Halt (2)Discard if not "ActionON_CrossSentenceExamples" in self.Configs["ExampleGeneration"]: self.PROGRAM_Halt ("Missing string ActionON_CrossSentenceExamples in ExampleGeneration dictionary in the config file."); else: if (not isinstance(self.Configs["ExampleGeneration"]["ActionON_CrossSentenceExamples"], unicode)): self.PROGRAM_Halt ("ActionON_CrossSentenceExamples in ExampleGeneration dictionary in the config file should be STRING and either 'Halt' or 'Discard'."); self.Configs["ExampleGeneration"]["ActionON_CrossSentenceExamples"] = self.Configs["ExampleGeneration"]["ActionON_CrossSentenceExamples"].upper() if not (self.Configs["ExampleGeneration"]["ActionON_CrossSentenceExamples"] in ["HALT" , "DISCARD"]): self.PROGRAM_Halt ("ActionON_CrossSentenceExamples in ExampleGeneration dictionary in the config file should be either 'Halt' or 'Discard'."); if self.Configs["ExampleGeneration"]["ActionON_CrossSentenceExamples"] == "DISCARD": self.lp (["*"*40 , "*"*40 ,"*"*40 , "[WARNING]: DISCARDING ALL CROSS-SENTECE RELATIONS IF THERE IS ANY !!!" , "*" *40, "*"*40 ,"*"*40]) ; #ActionON_MissingRelations: (1)Halt (2)GenerateAsNegatives if not "ActionON_MissingRelations" in self.Configs["ExampleGeneration"]: self.PROGRAM_Halt ("Missing string ActionON_MissingRelations in ExampleGeneration dictionary in the config file."); else: if (not isinstance(self.Configs["ExampleGeneration"]["ActionON_MissingRelations"], unicode)): self.PROGRAM_Halt ("ActionON_MissingRelations in ExampleGeneration dictionary in the config file should be STRING and either 'Halt' or 'GenerateAsNegatives'."); self.Configs["ExampleGeneration"]["ActionON_MissingRelations"] = self.Configs["ExampleGeneration"]["ActionON_MissingRelations"].upper() if not (self.Configs["ExampleGeneration"]["ActionON_MissingRelations"] in ["HALT" , "GENERATEASNEGATIVES"]): self.PROGRAM_Halt ("ActionON_MissingRelations in ExampleGeneration dictionary in the config file should be either 'Halt' or 'GenerateAsNegatives'."); if self.Configs["ExampleGeneration"]["ActionON_MissingRelations"] == "GENERATEASNEGATIVES": self.lp (["*"*40,"*"*40,"*"*40, "[WARNING]: Missing Relations will be generated as Negatives !!!" ,"*"*40 ,"*"*40,"*" *40]); #ActionON_DuplicateRelations if not "ActionON_DuplicateRelations" in self.Configs["ExampleGeneration"]: self.PROGRAM_Halt ("Missing string ActionON_DuplicateRelations in ExampleGeneration dictionary in the config file."); else: if (not isinstance(self.Configs["ExampleGeneration"]["ActionON_DuplicateRelations"], unicode)): self.PROGRAM_Halt ("ActionON_DuplicateRelations in ExampleGeneration dictionary in the config file should be string and either 'Halt', 'Ignore', or 'Discard'."); self.Configs["ExampleGeneration"]["ActionON_DuplicateRelations"] = self.Configs["ExampleGeneration"]["ActionON_DuplicateRelations"].upper() if not (self.Configs["ExampleGeneration"]["ActionON_DuplicateRelations"] in ["HALT" , "IGNORE" , "DISCARD"]): self.PROGRAM_Halt ("ActionON_MissingRelations in ExampleGeneration dictionary in the config file should be either 'Halt' or 'Ignore', or 'Discard'."); if self.Configs["ExampleGeneration"]["ActionON_MissingRelations"] == "DISCARD": self.lp (["*"*40,"*"*40,"*"*40, "[WARNING]: DISCARDING ANY DUPLICATE RELATIONS !!!" ,"*"*40 ,"*"*40,"*" *40]); #4: ValidEnityTypesForRelations if not "ValidEnityTypesForRelations" in self.Configs: self.PROGRAM_Halt ("Missing ValidEnityTypesForRelations dictionary in the config file."); self.__DO_NOT_SET_ME["ValidEnityTypesForRelations"] = set ([i.lower() for i in self.Configs["ValidEnityTypesForRelations"]]); #<<<CRITICAL>>> LOWER OneHotEncodingForValidEnityTypesForRelations = {} ; for i , e_tp in enumerate(self.Configs["ValidEnityTypesForRelations"]): OneHotEncodingForValidEnityTypesForRelations[e_tp.lower()] = i ; self.__DO_NOT_SET_ME["OneHotEncodingForValidEnityTypesForRelations"] = OneHotEncodingForValidEnityTypesForRelations ; #5: InteractionElementName if not "InteractionElementName" in self.Configs: self.PROGRAM_Halt ("Missing InteractionElementName in the config file."); if self.Configs["InteractionElementName"] == None: self.PROGRAM_Halt ("InteractionElementName in the config file should be either interaction or pair."); if not self.Configs["InteractionElementName"].lower() in ['interaction','pair']: self.PROGRAM_Halt ("InteractionElementName in the config file should be either 'interaction' or 'pair'."); self.__DO_NOT_SET_ME["InteractionElementName"] = self.Configs["InteractionElementName"].lower() ; #<<<CRITICAL>>> LOWER #6: InteractionElementClassAttributeName : in which attribute, class for interaction is given if not "InteractionElementClassAttributeName" in self.Configs: self.PROGRAM_Halt ("Missing InteractionElementClassAttributeName in the config file."); if self.Configs["InteractionElementClassAttributeName"] == None: self.PROGRAM_Halt ("InteractionElementClassAttributeName in the config file should be either interaction or pair."); if not self.Configs["InteractionElementClassAttributeName"].lower() in ['interaction','type']: self.PROGRAM_Halt ("InteractionElementClassAttributeName in the config file should be either 'interaction' or 'type'."); self.__DO_NOT_SET_ME["InteractionElementClassAttributeName"] = self.Configs["InteractionElementClassAttributeName"].lower() ; #7: W2V if not "W2V_Model" in self.Configs: self.PROGRAM_Halt ("Missing W2V_Model dictionary in the config file."); if not "Model_Address" in self.Configs["W2V_Model"]: self.PROGRAM_Halt ("Missing Model_Address in W2V_Model dictionary in config file."); if not "MaxWordsInMemory" in self.Configs["W2V_Model"]: self.PROGRAM_Halt ("Missing MaxWordsInMemory in W2V_Model dictionary in config file."); if not GF.FILE_CheckFileExists(self.Configs["W2V_Model"]["Model_Address"]): self.PROGRAM_Halt ("File address for W2V_Model is not valid: file not exists."); #8: Replace W2V vector of mentions with a better general W2V vector if mention not found in W2V model ... if "ReplaceVectorForEntityTypeIfTokenNotFound" in self.Configs["ExampleGeneration"]: VectorReplacementDict = {}; for Entity_Type , ReplacementVector in self.Configs["ExampleGeneration"]["ReplaceVectorForEntityTypeIfTokenNotFound"]: VectorReplacementDict[Entity_Type.lower()] = ReplacementVector ; if len (VectorReplacementDict) > 0: self.lp (["REPLACING VECTORS FOR SOME ENTITIY TYPES IF NOT FOUND IN W2V Model" , str(VectorReplacementDict)]); self.__DO_NOT_SET_ME["ExampleGeneration"]["ReplaceVectorForEntityTypeIfTokenNotFound_Dict"]= VectorReplacementDict ; else: self.__DO_NOT_SET_ME["ExampleGeneration"]["ReplaceVectorForEntityTypeIfTokenNotFound_Dict"]= {}; #9: Check if ValidInteractingPairEntityTypes is given ... if not "ValidInteractingPairEntityTypes" in self.Configs: self.PROGRAM_Halt ("Missing ValidInteractingPairEntityTypes list in the config file."); if self.Configs["ValidInteractingPairEntityTypes"] == None: self.PROGRAM_Halt ("ValidInteractingPairEntityTypes list is null in the config file."); if len(self.Configs["ValidInteractingPairEntityTypes"]) < 1: self.PROGRAM_Halt ("ValidInteractingPairEntityTypes in the config file should be list with at least one pair."); L = self.Configs["ValidInteractingPairEntityTypes"] ; ListOf_ValidInteractingPairEntityTypes = [] ; for EntityTypePair in L: if (not EntityTypePair[0].lower() in self.Configs["ValidEnityTypesForRelations"]) or \ (not EntityTypePair[1].lower() in self.Configs["ValidEnityTypesForRelations"]) : self.PROGRAM_Halt ("Invalid entity pairs for section ValidInteractingPairEntityTypes:" + str(EntityTypePair)); ListOf_ValidInteractingPairEntityTypes.append ([EntityTypePair[0].lower(),EntityTypePair[1].lower()]); #Important comment out: # now we do not put both (e1tp,e2tp) and (e2tp,e1tp) here ... # only preserve the original, so that we can create negatives according to this order for now .... #ListOf_ValidInteractingPairEntityTypes.append ([EntityTypePair[1].lower(),EntityTypePair[0].lower()]); self.__DO_NOT_SET_ME["ListOf_ValidInteractingPairEntityTypes"] = ListOf_ValidInteractingPairEntityTypes ; #10: Can interact with itself: for example self-interacting proteins or drug ... if (not "SelfInteractingEntities" in self.Configs): self.PROGRAM_Halt ("Missing SelfInteractingEntities list in the config file. Set it to 'null' if you need nothing."); if self.Configs["SelfInteractingEntities"] == None: self.__DO_NOT_SET_ME["SelfInteractingEntities"] = [] ; else: L = [i.lower() for i in self.Configs["SelfInteractingEntities"]]; for i in L: if not i in self.__DO_NOT_SET_ME["ValidEnityTypesForRelations"]: self.PROGRAM_Halt ("A self-interacting entity type is defined in section SelfInteractingEntities, but not in ValidEnityTypesForRelations in Config file!"); if not [i,i] in self.__DO_NOT_SET_ME["ListOf_ValidInteractingPairEntityTypes"]: self.PROGRAM_Halt ("A self-interacting entity type is defined in section SelfInteractingEntities, but no relation in ValidInteractingPairEntityTypes in Config file!"); self.__DO_NOT_SET_ME["SelfInteractingEntities"] = L ; #11: RemoveSentenceIfNoParseExists if (not "RemoveSentenceIfNoParseExists" in self.Configs): self.PROGRAM_Halt ("Missing boolean RemoveSentenceIfNoParseExists in the config file."); if (not isinstance (self.Configs["RemoveSentenceIfNoParseExists"] , bool)): self.PROGRAM_Halt ("RemoveSentenceIfNoParseExists in the config file should be either true or false."); #12: Max Sentence Length if (not "MAX_SENTENCE_LENGTH" in self.Configs["ExampleGeneration"]): self.PROGRAM_Halt ("MAX_SENTENCE_LENGTH in ExampleGeneration should be either null or an integer bigger than zero in the config file."); if self.Configs["ExampleGeneration"]["MAX_SENTENCE_LENGTH"] == None: self.Configs["ExampleGeneration"]["MAX_SENTENCE_LENGTH"] = -1 ; else: try: if isinstance (self.Configs["ExampleGeneration"]["MAX_SENTENCE_LENGTH"] , bool): raise Exception ("") ; if not isinstance (self.Configs["ExampleGeneration"]["MAX_SENTENCE_LENGTH"] , int): raise Exception ("") ; L = int (self.Configs["ExampleGeneration"]["MAX_SENTENCE_LENGTH"]) ; if not (L>0): raise Exception ("") ; except: self.PROGRAM_Halt ("MAX_SENTENCE_LENGTH in ExampleGeneration should be either null or an integer bigger than zero in the config file."); #13: Runtime Parameters: if (not "ExecutionParameters" in self.Configs): self.PROGRAM_Halt ("Missing dictionary ExecutionParameters in the config file."); if (not "DoNotAskAnyQuestions" in self.Configs["ExecutionParameters"]): self.PROGRAM_Halt ("Missing boolean DoNotAskAnyQuestions in ExecutionParameters dictionary in the config file."); if not isinstance (self.Configs["ExecutionParameters"]["DoNotAskAnyQuestions"], bool): self.PROGRAM_Halt ("DoNotAskAnyQuestions in ExecutionParameters dictionary in the config file should be true/false."); #14: Evaluation Parameters if (not "EvaluationParameters" in self.Configs): self.PROGRAM_Halt ("Missing dictionary EvaluationParameters in the config file."); if (not "ExcludeClassLabelsList" in self.Configs["EvaluationParameters"]): self.PROGRAM_Halt ("Missing list ExcludeClassLabelsList in EvaluationParameters dictionary in the config file."); if not isinstance (self.Configs["EvaluationParameters"]["ExcludeClassLabelsList"], list): self.PROGRAM_Halt ("ExcludeClassLabelsList in EvaluationParameters dictionary in the config file should be a list. Put [] if you don't want to exclude anything."); self.Configs["EvaluationParameters"]["ExcludeClassLabelsList"] = [i.lower() for i in self.Configs["EvaluationParameters"]["ExcludeClassLabelsList"]]; if len (self.Configs["EvaluationParameters"]["ExcludeClassLabelsList"]) > 0: ALL_CLASSES = self.Configs["CLASSES"]["Positive"] | self.Configs["CLASSES"]["Negative"] ; for i in self.Configs["EvaluationParameters"]["ExcludeClassLabelsList"]: if not i in ALL_CLASSES: self.PROGRAM_Halt (i + " class label is defined in [EvaluationParameters][ExcludeClassLabelsList] in the config file but not defined as a valid class in the CLASSES dictionary."); if (not "DecimalPoints" in self.Configs["EvaluationParameters"]): self.PROGRAM_Halt ("Missing integer DecimalPoints in EvaluationParameters dictionary in the config file."); if not isinstance (self.Configs["EvaluationParameters"]["DecimalPoints"], int): self.PROGRAM_Halt ("DecimalPoints in EvaluationParameters dictionary in the config file should be a positive integer. Put 2 for default."); if self.Configs["EvaluationParameters"]["DecimalPoints"] <=0 : self.PROGRAM_Halt ("DecimalPoints in EvaluationParameters dictionary in the config file should be a positive integer. Put 2 for default.");