def generate_search_interface_markup(self, dict_repr): """ Returns markup for the search interface component. https://github.com/leifos/simiir/blob/master/simiir/sim_config_generator/sim_config_generator.py """ interface_markup = read_file_to_string('base_files/interface.xml') interface_entry = dict_repr['simulation']['searchInterface'] attribute_markup_concat = '' for attribute in interface_entry['attributes']: attribute_markup = read_file_to_string('base_files/attribute.xml') attribute_markup = attribute_markup.format(attribute['@name'], attribute['@type'], attribute['@value'], attribute['@is_argument']) attribute_markup_concat = '{0}{1}'.format(attribute_markup_concat, attribute_markup) return interface_markup.format(dict_repr['simulation']['searchInterface']['class'], attribute_markup_concat)
def prepareUserPreRolledFiles(self): """ If the -u flag is used, replicate each user configuration file and append the appripriate PreRolled relevance judgment (qrels) file on each replication based on the number of runs (i.e. preRolled1.mark , preRolled2.mark etc) """ runId =1 for userConfPath in self.list_of_UserConfigs: i = 1 while i <= self.numOfRuns: # Read the current User File fileData1 = read_file_to_string(userConfPath) #Append the number of PreRolled File currRun_fileData1 = fileData1.replace('{9}',str(i)) xmlData1 = xml.dom.minidom.parseString(currRun_fileData1) tempFileName = os.path.basename(userConfPath) tempFileName = tempFileName.split('-')[0] + '-' + tempFileName.split('-')[1] baseDirect = os.path.dirname(userConfPath) newFileName = os.path.join(baseDirect,tempFileName+'_ID-'+str(runId) + '.xml') #Write the file with open(newFileName, "w") as f: f.write(xmlData1.toprettyxml()) f.close() #Append path to list self.listOfRefinedUserConfig.append(newFileName) i = i + 1 runId = runId +1
def prepareUserPreRolledFiles(self): """ If the -u flag is used, replicate each user configuration file and append the appripriate PreRolled relevance judgment (qrels) file on each replication based on the number of runs (i.e. preRolled1.mark , preRolled2.mark etc) """ runId = 1 for userConfPath in self.list_of_UserConfigs: i = 1 while i <= self.numOfRuns: # Read the current User File fileData1 = read_file_to_string(userConfPath) #Append the number of PreRolled File currRun_fileData1 = fileData1.replace('{9}', str(i)) xmlData1 = xml.dom.minidom.parseString(currRun_fileData1) tempFileName = os.path.basename(userConfPath) tempFileName = tempFileName.split( '-')[0] + '-' + tempFileName.split('-')[1] baseDirect = os.path.dirname(userConfPath) newFileName = os.path.join( baseDirect, tempFileName + '_ID-' + str(runId) + '.xml') #Write the file with open(newFileName, "w") as f: f.write(xmlData1.toprettyxml()) f.close() #Append path to list self.listOfRefinedUserConfig.append(newFileName) i = i + 1 runId = runId + 1
def generate_search_interface_markup(self, dict_repr): """ Returns markup for the search interface component. https://github.com/leifos/simiir/blob/master/simiir/sim_config_generator/sim_config_generator.py """ interface_markup = read_file_to_string('base_files/interface.xml') interface_entry = dict_repr['simulation']['searchInterface'] attribute_markup_concat = '' for attribute in interface_entry['attributes']: attribute_markup = read_file_to_string('base_files/attribute.xml') attribute_markup = attribute_markup.format( attribute['@name'], attribute['@type'], attribute['@value'], attribute['@is_argument']) attribute_markup_concat = '{0}{1}'.format(attribute_markup_concat, attribute_markup) return interface_markup.format( dict_repr['simulation']['searchInterface']['class'], attribute_markup_concat)
def generate_user_entries(self): """ Returns a series of XML components for user objects in the simulation file. https://github.com/leifos/simiir/blob/master/simiir/sim_config_generator/sim_config_generator.py """ entry_list = "" for entry in self.userConfigPaths: user_markup = read_file_to_string('base_files/user_entry.xml') entry_list = "{0}{1}".format(entry_list, user_markup.format(entry)) return entry_list
def create_attribute_markup(self, attribute_dict): """ Given a dictionary representing an attribute, returns the associated XML markup for that attribute component. https://github.com/leifos/simiir/blob/master/simiir/sim_config_generator/sim_config_generator.py """ attribute_markup = read_file_to_string('base_files/attribute.xml') value = attribute_dict['@value'].replace('[[ base_dir ]]', self.simulationBaseDir) attribute_markup = attribute_markup.format( attribute_dict['@name'], attribute_dict['@type'], value, attribute_dict['@is_argument']) return attribute_markup
def create_attribute_markup(self, attribute_dict): """ Given a dictionary representing an attribute, returns the associated XML markup for that attribute component. https://github.com/leifos/simiir/blob/master/simiir/sim_config_generator/sim_config_generator.py """ attribute_markup = read_file_to_string('base_files/attribute.xml') value = attribute_dict['@value'].replace('[[ base_dir ]]', self.simulationBaseDir) attribute_markup = attribute_markup.format(attribute_dict['@name'], attribute_dict['@type'], value, attribute_dict['@is_argument']) return attribute_markup
def generate_markup_simulationFiles(self): """ Given a tuple of dictionary objects,topics and users generates the markup & create files for the associated simulations. Returns the a list with the full paths of the simulation config. files """ simulation_files = [] #Count Iterations i = 0 for iteration in self.simulationPermutations: simulation_markup_components = { 'id': None, 'searchInterface': { 'class': None, 'attributes': None, 'attributes_py': None } } """ Extract the components for the simulation configuration for this iteration """ for component in iteration: component_type = component['type'] simulation_markup_components[component_type][ 'class'] = component['@name'] if 'attribute' in component: component_attributes = component['attribute'] simulation_markup_components[component_type][ 'attributes_py'] = component_attributes if type(component_attributes) == dict: simulation_markup_components[component_type][ 'attributes'] = self.create_attribute_markup( component_attributes) else: for attribute in component_attributes: if simulation_markup_components[component_type][ 'attributes'] is None: simulation_markup_components[component_type][ 'attributes'] = "" simulation_markup_components[component_type][ 'attributes'] = "{0}{1}".format( simulation_markup_components[ component_type]['attributes'], self.create_attribute_markup(attribute)) """ Find Retrival Model """ retrModelVal = -1 for comp in self.dictRepr['simulationConfiguration'][ 'searchInterface'][i]['attribute']: if comp['@name'] == 'model': retrModelVal = comp['@value'] retrModels = {'0': 'TFIDF', '1': 'BM25', '2': 'PL2', '-1': ''} retrModel = retrModels[retrModelVal] #Generate users and topics #If user flag set, users will be set to sim.Config files at a later state if self.userFlag == '-u': users = "{3}" else: users = self.generate_user_entries() topics = self.generate_topics() """ Construct the simulation configuration markup """ simulation_markup = read_file_to_string( 'base_files/simulation.xml') simulation_markup = simulation_markup.format( 'trec_{0}_simulation-{1}'.format(retrModel, str(i)), os.path.join(self.simulationBaseDir, '{0}/{1}/{2}'), topics, users, simulation_markup_components['searchInterface']['class'], simulation_markup_components['searchInterface']['attributes'] if simulation_markup_components['searchInterface']['attributes'] is not None else "") """ Create the simulation configuration file """ fileName = '/trec_{0}_simulation-{1}.xml'.format(retrModel, str(i)) i = i + 1 filePath = self.simulationBaseDir + fileName simulation_files.append(filePath) if not os.path.exists(os.path.dirname(filePath)): os.makedirs(os.path.dirname(filePath)) with open(filePath, "w") as user_file: user_file.write(simulation_markup) user_file.close() self.simulConfigPaths = simulation_files
def generate_markup_userFiles(self): """ Given a tuple of dictionary objects, generates the markup & create files for the associated users. Returns a list with the full paths of the user config. files """ user_files = [] baseDir = self.dictRepr['simulationConfiguration']['@baseDir'] # Check if the Directory has / character if baseDir[-1] == '/': baseDir = baseDir[:-1] self.simulationBaseDir = os.path.join( os.path.dirname(os.path.abspath(baseDir)), baseDir.split('/')[-1]) else: self.simulationBaseDir = os.path.join( os.path.dirname(os.path.abspath(baseDir)), baseDir.split('/')[-1]) componentsPath = os.path.join(self.simiirPath, 'simiir') j = 1 for iteration in self.userPermutations: user_markup_components = { 'id': None, 'queryGenerator': { 'class': None, 'attributes': None, 'attributes_py': None }, 'snippetClassifier': { 'class': None, 'attributes': None, 'attributes_py': None }, 'documentClassifier': { 'class': None, 'attributes': None, 'attributes_py': None }, 'stoppingDecisionMaker': { 'class': None, 'attributes': None, 'attributes_py': None }, 'logger': { 'class': None, 'attributes': None, 'attributes_py': None }, 'searchContext': { 'class': None, 'attributes': None, 'attributes_py': None }, } """ Extract the components for the user configuration for this iteration """ for component in iteration: component_type = component['type'] user_markup_components[component_type]['class'] = component[ '@name'] if 'attribute' in component: component_attributes = component['attribute'] user_markup_components[component_type][ 'attributes_py'] = component_attributes if type(component_attributes) == dict: user_markup_components[component_type][ 'attributes'] = self.create_attribute_markup( component_attributes) else: for attribute in component_attributes: if user_markup_components[component_type][ 'attributes'] is None: user_markup_components[component_type][ 'attributes'] = "" user_markup_components[component_type][ 'attributes'] = "{0}{1}".format( user_markup_components[component_type] ['attributes'], self.create_attribute_markup(attribute)) # File Name includes queryGenerator_StoppinStrategy-Value fileName = '/user_{0}_{1}-{2}-{3}.xml'.format( user_markup_components['queryGenerator']['class'], user_markup_components['stoppingDecisionMaker']['class'], user_markup_components['stoppingDecisionMaker'] ['attributes'].split('value="')[1].split('"')[0], str(j)) j = j + 1 # Extract User Id from File name user_base_id = fileName[6:-4] # Validate Stopping Decision Maker Component if user_markup_components['stoppingDecisionMaker'][ 'class'] not in get_available_classes( componentsPath, 'stopping_decision_makers'): print 'The Stopping Decision Maker component ' + user_markup_components[ 'stoppingDecisionMaker'][ 'class'] + ' does not exist. Please ensure that you have typed the name correctly!' sys.exit(2) # Validate Query Generator Component if user_markup_components['queryGenerator'][ 'class'] not in get_available_classes( componentsPath, 'query_generators'): print 'The Query Generator component ' + user_markup_components[ 'queryGenerator'][ 'class'] + ' does not exist. Please ensure that you have typed the name correctly!' sys.exit(2) """ Prepare String markup for user configuration """ user_markup = read_file_to_string('base_files/user.xml') user_markup = user_markup.format( user_base_id, user_markup_components['queryGenerator']['class'], user_markup_components['queryGenerator']['attributes'] if user_markup_components['queryGenerator']['attributes'] is not None else "", user_markup_components['snippetClassifier']['class'], user_markup_components['snippetClassifier']['attributes'] if user_markup_components['snippetClassifier']['attributes'] is not None else "", user_markup_components['documentClassifier']['class'], user_markup_components['documentClassifier']['attributes'] if user_markup_components['documentClassifier']['attributes'] is not None else "", user_markup_components['stoppingDecisionMaker']['class'], user_markup_components['stoppingDecisionMaker']['attributes'] if user_markup_components['stoppingDecisionMaker']['attributes'] is not None else "", user_markup_components['logger']['class'], user_markup_components['logger']['attributes'] if user_markup_components['logger']['attributes'] is not None else "", user_markup_components['searchContext']['class'], user_markup_components['searchContext']['attributes'] if user_markup_components['searchContext']['attributes'] is not None else "") """ Create the user configuration file """ filePath = self.simulationBaseDir + fileName user_files.append(filePath) if not os.path.exists(os.path.dirname(filePath)): os.makedirs(os.path.dirname(filePath)) with open(filePath, "w") as user_file: user_file.write(user_markup) user_file.close() self.userConfigPaths = user_files
def generate_topic_markup(entry): topic_markup = read_file_to_string('base_files/topic.xml') topic_markup = topic_markup.format(entry['@id'], entry['@filename'], entry['@qrelsFilename']) return topic_markup
def generate_markup_simulationFiles(self): """ Given a tuple of dictionary objects,topics and users generates the markup & create files for the associated simulations. Returns the a list with the full paths of the simulation config. files """ simulation_files = [] #Count Iterations i=0 for iteration in self.simulationPermutations: simulation_markup_components = { 'id': None, 'searchInterface': {'class': None, 'attributes': None, 'attributes_py': None} } """ Extract the components for the simulation configuration for this iteration """ for component in iteration: component_type = component['type'] simulation_markup_components[component_type]['class'] = component['@name'] if 'attribute' in component: component_attributes = component['attribute'] simulation_markup_components[component_type]['attributes_py'] = component_attributes if type(component_attributes) == dict: simulation_markup_components[component_type]['attributes'] = self.create_attribute_markup(component_attributes) else: for attribute in component_attributes: if simulation_markup_components[component_type]['attributes'] is None: simulation_markup_components[component_type]['attributes'] = "" simulation_markup_components[component_type]['attributes'] = "{0}{1}".format(simulation_markup_components[component_type]['attributes'], self.create_attribute_markup(attribute)) """ Find Retrival Model """ retrModelVal=-1 for comp in self.dictRepr['simulationConfiguration']['searchInterface'][i]['attribute']: if comp['@name'] == 'model': retrModelVal = comp['@value'] retrModels = {'0': 'TFIDF', '1': 'BM25', '2': 'PL2', '-1':''} retrModel = retrModels[retrModelVal] #Generate users and topics #If user flag set, users will be set to sim.Config files at a later state if self.userFlag == '-u': users = "{3}" else: users = self.generate_user_entries() topics = self.generate_topics() """ Construct the simulation configuration markup """ simulation_markup = read_file_to_string('base_files/simulation.xml') simulation_markup = simulation_markup.format('trec_{0}_simulation-{1}'.format(retrModel,str(i)), os.path.join(self.simulationBaseDir, '{0}/{1}/{2}'), topics, users, simulation_markup_components['searchInterface']['class'], simulation_markup_components['searchInterface']['attributes'] if simulation_markup_components['searchInterface']['attributes'] is not None else "") """ Create the simulation configuration file """ fileName ='/trec_{0}_simulation-{1}.xml'.format(retrModel,str(i)) i = i +1 filePath = self.simulationBaseDir + fileName simulation_files.append(filePath) if not os.path.exists(os.path.dirname(filePath)): os.makedirs(os.path.dirname(filePath)) with open(filePath, "w") as user_file: user_file.write(simulation_markup) user_file.close() self.simulConfigPaths = simulation_files
def generate_markup_userFiles(self): """ Given a tuple of dictionary objects, generates the markup & create files for the associated users. Returns a list with the full paths of the user config. files """ user_files = [] baseDir = self.dictRepr['simulationConfiguration']['@baseDir'] # Check if the Directory has / character if baseDir[-1] =='/': baseDir = baseDir[:-1] self.simulationBaseDir = os.path.join(os.path.dirname(os.path.abspath(baseDir)),baseDir.split('/')[-1]) else: self.simulationBaseDir = os.path.join(os.path.dirname(os.path.abspath(baseDir)),baseDir.split('/')[-1]) componentsPath = os.path.join(self.simiirPath,'simiir') j=1 for iteration in self.userPermutations: user_markup_components = { 'id': None, 'queryGenerator': {'class': None, 'attributes': None, 'attributes_py': None}, 'snippetClassifier': {'class': None, 'attributes': None, 'attributes_py': None}, 'documentClassifier': {'class': None, 'attributes': None, 'attributes_py': None}, 'stoppingDecisionMaker': {'class': None, 'attributes': None, 'attributes_py': None}, 'logger': {'class': None, 'attributes': None, 'attributes_py': None}, 'searchContext': {'class': None, 'attributes': None, 'attributes_py': None}, } """ Extract the components for the user configuration for this iteration """ for component in iteration: component_type = component['type'] user_markup_components[component_type]['class'] = component['@name'] if 'attribute' in component: component_attributes = component['attribute'] user_markup_components[component_type]['attributes_py'] = component_attributes if type(component_attributes) == dict: user_markup_components[component_type]['attributes'] = self.create_attribute_markup(component_attributes) else: for attribute in component_attributes: if user_markup_components[component_type]['attributes'] is None: user_markup_components[component_type]['attributes'] = "" user_markup_components[component_type]['attributes'] = "{0}{1}".format(user_markup_components[component_type]['attributes'], self.create_attribute_markup(attribute)) # File Name includes queryGenerator_StoppinStrategy-Value fileName = '/user_{0}_{1}-{2}-{3}.xml'.format(user_markup_components['queryGenerator']['class'], user_markup_components['stoppingDecisionMaker']['class'], user_markup_components['stoppingDecisionMaker']['attributes'].split('value="')[1].split('"')[0], str(j)) j=j+1 # Extract User Id from File name user_base_id = fileName[6:-4] # Validate Stopping Decision Maker Component if user_markup_components['stoppingDecisionMaker']['class'] not in get_available_classes(componentsPath,'stopping_decision_makers'): print 'The Stopping Decision Maker component ' + user_markup_components['stoppingDecisionMaker']['class'] + ' does not exist. Please ensure that you have typed the name correctly!' sys.exit(2) # Validate Query Generator Component if user_markup_components['queryGenerator']['class'] not in get_available_classes(componentsPath,'query_generators'): print 'The Query Generator component ' + user_markup_components['queryGenerator']['class'] + ' does not exist. Please ensure that you have typed the name correctly!' sys.exit(2) """ Prepare String markup for user configuration """ user_markup = read_file_to_string('base_files/user.xml') user_markup = user_markup.format( user_base_id, user_markup_components['queryGenerator']['class'], user_markup_components['queryGenerator']['attributes'] if user_markup_components['queryGenerator']['attributes'] is not None else "", user_markup_components['snippetClassifier']['class'], user_markup_components['snippetClassifier']['attributes'] if user_markup_components['snippetClassifier']['attributes'] is not None else "", user_markup_components['documentClassifier']['class'], user_markup_components['documentClassifier']['attributes'] if user_markup_components['documentClassifier']['attributes'] is not None else "", user_markup_components['stoppingDecisionMaker']['class'], user_markup_components['stoppingDecisionMaker']['attributes'] if user_markup_components['stoppingDecisionMaker']['attributes'] is not None else "", user_markup_components['logger']['class'], user_markup_components['logger']['attributes'] if user_markup_components['logger']['attributes'] is not None else "", user_markup_components['searchContext']['class'], user_markup_components['searchContext']['attributes'] if user_markup_components['searchContext']['attributes'] is not None else "") """ Create the user configuration file """ filePath = self.simulationBaseDir + fileName user_files.append(filePath) if not os.path.exists(os.path.dirname(filePath)): os.makedirs(os.path.dirname(filePath)) with open(filePath, "w") as user_file: user_file.write(user_markup) user_file.close() self.userConfigPaths = user_files
def prepareConfigFile(self): """ Take a simulation configuration (path) and according to the number of runs needed created the appropriate dirs and temporary simulation configuration files and append their full path to a list """ # If flag is used create users with preRolled qrles files if self.flag == '-u': self.prepareUserPreRolledFiles() # Replicate the base simulation configuration files according to the number of runs required. for simConfigPath in self.list_of_SimConfigs: fileData = read_file_to_string(simConfigPath) fileName = simConfigPath.split('/')[-1][:-4] #Remove the fileName from the simConfigPath and thats the baseDir baseDir = simConfigPath.split(fileName)[0] # userConfig counter counter = 1 currRun = 1 userConfStratPointer = 0 while (currRun <= self.numOfRuns): # Format the baseDir in the current Simulation Configuration File currRun_fileData = fileData.replace('{0}','output') currRun_fileData = currRun_fileData.replace('{1}',fileName) currRun_fileData = currRun_fileData.replace('{2}', str(currRun)) # If flag is set append appropriate user configurations to each simulation Configuration file if self.flag == '-u': entry_list = "" withinUserConfPointer = 0 # Append the appropriate number/portion of simulated user configurations (w. PreRolled relevance) to the specific simulation configuration # (i.e. each simulation configurtaion run to include the appropriate user configuration (e.g. simulation_Run1 - > user1 (... prerolled1.mark)) while counter <= ((len(self.listOfRefinedUserConfig)/self.numOfRuns) * currRun): user_markup = read_file_to_string('base_files/user_entry.xml') entry_list = "{0}{1}".format(entry_list, user_markup.replace('{0}',self.listOfRefinedUserConfig[userConfStratPointer + withinUserConfPointer])) counter = counter + 1 withinUserConfPointer = withinUserConfPointer + self.numOfRuns #Append the appropriate user file paths to the appropriate simulation configuration file currRun_fileData = currRun_fileData.replace('{3}',entry_list) userConfStratPointer += 1 xmlData = xml.dom.minidom.parseString(currRun_fileData) newBaseDir = os.path.join(baseDir, 'output/'+fileName+'/'+str(currRun)) newFileName = fileName + '_Run-' +str(currRun) + '.xml' #Create the directory if it does not exist and then the necessary simulation configuration file if not os.path.exists(newBaseDir): os.makedirs(newBaseDir) with open(baseDir+newFileName, "w") as f: f.write(xmlData.toprettyxml()) #Append the new conf File to the list of Runs self.listOfRuns.append(baseDir+newFileName) currRun += 1
def prepareConfigFile(self): """ Take a simulation configuration (path) and according to the number of runs needed created the appropriate dirs and temporary simulation configuration files and append their full path to a list """ # If flag is used create users with preRolled qrles files if self.flag == '-u': self.prepareUserPreRolledFiles() # Replicate the base simulation configuration files according to the number of runs required. for simConfigPath in self.list_of_SimConfigs: fileData = read_file_to_string(simConfigPath) fileName = simConfigPath.split('/')[-1][:-4] #Remove the fileName from the simConfigPath and thats the baseDir baseDir = simConfigPath.split(fileName)[0] # userConfig counter counter = 1 currRun = 1 userConfStratPointer = 0 while (currRun <= self.numOfRuns): # Format the baseDir in the current Simulation Configuration File currRun_fileData = fileData.replace('{0}', 'output') currRun_fileData = currRun_fileData.replace('{1}', fileName) currRun_fileData = currRun_fileData.replace( '{2}', str(currRun)) # If flag is set append appropriate user configurations to each simulation Configuration file if self.flag == '-u': entry_list = "" withinUserConfPointer = 0 # Append the appropriate number/portion of simulated user configurations (w. PreRolled relevance) to the specific simulation configuration # (i.e. each simulation configurtaion run to include the appropriate user configuration (e.g. simulation_Run1 - > user1 (... prerolled1.mark)) while counter <= ( (len(self.listOfRefinedUserConfig) / self.numOfRuns) * currRun): user_markup = read_file_to_string( 'base_files/user_entry.xml') entry_list = "{0}{1}".format( entry_list, user_markup.replace( '{0}', self.listOfRefinedUserConfig[ userConfStratPointer + withinUserConfPointer])) counter = counter + 1 withinUserConfPointer = withinUserConfPointer + self.numOfRuns #Append the appropriate user file paths to the appropriate simulation configuration file currRun_fileData = currRun_fileData.replace( '{3}', entry_list) userConfStratPointer += 1 xmlData = xml.dom.minidom.parseString(currRun_fileData) newBaseDir = os.path.join( baseDir, 'output/' + fileName + '/' + str(currRun)) newFileName = fileName + '_Run-' + str(currRun) + '.xml' #Create the directory if it does not exist and then the necessary simulation configuration file if not os.path.exists(newBaseDir): os.makedirs(newBaseDir) with open(baseDir + newFileName, "w") as f: f.write(xmlData.toprettyxml()) #Append the new conf File to the list of Runs self.listOfRuns.append(baseDir + newFileName) currRun += 1