class MDLR2MDL(object): ''' given an mdlr definition this script will get a set of mdl files compatible with an nfsim bng-xml definition ''' def __init__(self, configpath): with open(configpath, 'r') as f: self.config = yaml.load(f.read()) try: self.nfsim = NFSim( os.path.join(self.config['libpath'], 'libnfsim_c.so')) except OSError: print("Cannot open libnfsim_c.so. Please check libpath in " "mcellr.yaml") sys.exit(0) def process_mdlr(self, mdlrPath): ''' main method. extracts species definition, creates bng-xml, creates mdl definitions ''' try: nauty_dict = self.xml2hnauty_species_definitions(mdlrPath) except OSError: print("Cannot open BNG2.pl. Please check bionetgen in mcellr.yaml") sys.exit(0) # append extended bng-xml to the bng-xml definition (the one that # doesn't include seed information) bngxmlestr = writeBXe.merge_bxbxe(namespace.input + '_total.xml', namespace.input + '_extended.xml') with open(mdlrPath + '_total.xml', 'w') as f: f.write(bngxmlestr) xmlspec = read_bngxml.parseFullXML(namespace.input + '.xml') # write out the equivalent plain mdl stuffs mdl_dict = write_mdl.construct_mcell(xmlspec, namespace.input, final_name.split(os.sep)[-1], nauty_dict) write_mdl.write_mdl(mdl_dict, final_name) def tokenize_seed_elements(self, seed): # extract species names seedKeys = re.findall( 'concentration="[0-9a-zA-Z_]+" name="[_0-9a-zA-Z@:(~!),.]+"', seed) seedKeys = [ re.sub( 'concentration="([0-9a-zA-Z_]+)" name="([_0-9a-zA-Z@:(~!),.]+)"', '\g<2>;\g<1>', x) for x in seedKeys ] seedList = seed.split('</Species>') seedList = [x.strip() for x in seedList if x != ''] seedList = [x + '</Species>' for x in seedList] seedList = [re.sub('"S[0-9]+"', "S1", x) for x in seedList] seedList = [ re.sub('concentration="[0-9a-zA-Z_]+"', 'concentration="1"', x) for x in seedList ] seedList = [ '<Model><ListOfSpecies>{0}</ListOfSpecies></Model>'.format(x) for x in seedList ] # seed_dict = {x:y for x, y in zip(seedKeys, seedList)} seed_dict = { x.split(';')[0]: y for x, y in zip(seedKeys, seedList) if x.split(';')[1] != '0' } # print '---', seed_dict.keys() return seed_dict def get_names_from_definition_string(self, defStr): species_names = re.findall('[0-9a-zA-Z_]+\(', defStr) return [x[:-1] for x in species_names] def xml2hnauty_species_definitions(self, inputMDLRFile): """ Temporary function for translating xml bng definitions to nauty species definition strings it call the nfsim library to get the list of possible complexes in the system, however the function right now returns the species in question + all molecule types (if we are sending a lone molecule tye as initialization it still returns all molecule types), which means the list requires filterinng. and the filtering is not pretty How to solve: make it so that nfsim returns a more sensible complex list (filtering out unrelated molecule types) or create a nauty label creation mechanism locally """ command = [ self.config['bionetgen'], '-xml', '-check', inputMDLRFile + '.bngl' ] output_dir = os.path.dirname(inputMDLRFile) if output_dir: command.extend(['--outdir', output_dir]) # get a bng-xml file call(command) # extract seed species defition seed, rest = split_bngxml.extractSeedBNG(inputMDLRFile + '.xml') # store xml with non-seed sections and load up nfsim library with open(namespace.input + '_total.xml', 'w') as f: f.write(rest) # load up nfsim library self.nfsim.init_nfsim(namespace.input + '_total.xml', 0) # remove encapsulating tags seed = seed[30:-30] # get the seed species definitions as a list seed_dict = self.tokenize_seed_elements(seed) nauty_dict = {} for seed in seed_dict: # initialize nfsim with each species definition and get back a # dirty list where one of the entries is the one we want # # XXX: i think i've solved it on the nfsim side, double check tmpList = self.get_nauty_string(seed_dict[seed]) # and now filter it out... # get species names from species definition string species_names = self.get_names_from_definition_string(seed) nauty_dict[seed] = [ x for x in tmpList if all(y in x for y in species_names) ][0] return nauty_dict def get_nauty_string(self, xmlSpeciesDefinition): self.nfsim.reset_system() self.nfsim.init_system_xml(xmlSpeciesDefinition) result = self.nfsim.querySystemStatus("complex") return result
class MDLR2MDL(object): ''' given an mdlr definition this script will get a set of mdl files compatible with an nfsim bng-xml definition ''' def __init__(self, configpath): self.config = {} self.config['bionetgen'] = os.path.join(configpath, 'bng2', 'BNG2.pl') self.config['mcell'] = os.path.join(configpath, 'mcell') self.config['libpath'] = os.path.join(configpath, 'lib') self.config['scriptpath'] = configpath prefix = "lib" if (sys.platform == 'linux') or (sys.platform == 'linux2'): extension = "so" elif (sys.platform == 'darwin'): extension = "dylib" elif (sys.platform == 'win32'): extension = "dll" else: raise Exception("Unexpected platform: {0}".format(sys.platform)) libNFsim_path = os.path.join(self.config['libpath'], '{0}NFsim.{1}'.format(prefix, extension)) libnfsim_c_path = os.path.join( self.config['libpath'], '{0}nfsim_c.{1}'.format(prefix, extension)) if not os.path.exists(libNFsim_path): # try the build directory paths libNFsim_path = os.path.join( self.config['libpath'], '..', 'nfsim', '{0}NFsim.{1}'.format(prefix, extension)) if not os.path.exists(libnfsim_c_path): # try the cygwin variant libnfsim_c_path = os.path.join( self.config['libpath'], '..', 'nfsimCInterface', '{0}nfsim_c.{1}'.format(prefix, extension)) print("Loading libs from " + libNFsim_path + " and " + libnfsim_c_path + ".") self.nfsim = NFSim(libnfsim_c_path, libNFsim_path=libNFsim_path) def process_mdlr(self, mdlrPath): ''' main method. extracts species definition, creates bng-xml, creates mdl definitions ''' try: nauty_dict = self.xml2hnauty_species_definitions(mdlrPath) except OSError: print( 'Cannot open BNG2.pl. Please check BioNetGen is installed at: %s' % (self.config['bionetgen'])) sys.exit(0) # append extended bng-xml to the bng-xml definition (the one that # doesn't include seed information) bngxmlestr = writeBXe.merge_bxbxe( namespace.input + '_rules.xml', namespace.input + '_extended_bng.xml') with open(mdlrPath + '_rules.xml', 'w') as f: f.write(bngxmlestr) xmlspec = read_bngxml.parseFullXML(namespace.input + '.xml') # write out the equivalent plain mdl stuffs mdl_dict = write_mdl.construct_mcell(xmlspec, namespace.input, final_name.split(os.sep)[-1], nauty_dict) write_mdl.write_mdl(mdl_dict, final_name) def tokenize_seed_elements(self, seed): # extract species names seedKeys = re.findall( 'concentration="[0-9a-zA-Z_]+" name="[_0-9a-zA-Z@:(~!),.]+"', seed) seedKeys = [ re.sub( 'concentration="([0-9a-zA-Z_]+)" name="([_0-9a-zA-Z@:(~!),.]+)"', '\g<2>;\g<1>', x) for x in seedKeys ] seedList = seed.split('</Species>') seedList = [x.strip() for x in seedList if x != ''] seedList = [x + '</Species>' for x in seedList] # Jose's code here seems to go overboard and also rename components named S[0-9]+ # The intent seems to be to rename Species id's from "S[0-9]+" to S1, with double quotes dropped # # seedList = [re.sub('"S[0-9]+"', "S1", x) for x in seedList] # # Attempt to fix this by parsing the full context of 'Species id=...' seedList = [ re.sub('Species id="S[0-9]+"', 'Species id=S1', x) for x in seedList ] seedList = [ re.sub('concentration="[0-9a-zA-Z_]+"', 'concentration="1"', x) for x in seedList ] seedList = [ '<Model><ListOfSpecies>{0}</ListOfSpecies></Model>'.format(x) for x in seedList ] # seed_dict = {x:y for x, y in zip(seedKeys, seedList)} seed_dict = { x.split(';')[0]: y for x, y in zip(seedKeys, seedList) if x.split(';')[1] != '0' } # print '---', seed_dict.keys() return seed_dict def get_names_from_definition_string(self, defStr): species_names = re.findall('[0-9a-zA-Z_]+\(', defStr) return [x[:-1] for x in species_names] def xml2hnauty_species_definitions(self, inputMDLRFile): """ Temporary function for translating xml bng definitions to nauty species definition strings it call the nfsim library to get the list of possible complexes in the system, however the function right now returns the species in question + all molecule types (if we are sending a lone molecule tye as initialization it still returns all molecule types), which means the list requires filtering. and the filtering is not pretty How to solve: make it so that nfsim returns a more sensible complex list (filtering out unrelated molecule types) or create a nauty label creation mechanism locally """ command = [ 'perl', self.config['bionetgen'], '-xml', '-check', inputMDLRFile + '.bngl' ] output_dir = os.path.dirname(inputMDLRFile) if output_dir: command.extend(['--outdir', output_dir]) # get a bng-xml file print("\n====> Running BioNetGen with explicit \"perl\": " + " ".join(command) + "\n") call(command) # extract seed species definition seed, rest = split_bngxml.extractSeedBNG(inputMDLRFile + '.xml') # store xml with non-seed sections and load up nfsim library print("\nStore xml with non-seed sections and load up nfsim library\n") with open(namespace.input + '_rules.xml', 'w') as f: f.write(rest) # load up nfsim library print("Initializing NFSim using: " + namespace.input + '_rules.xml') self.nfsim.init_nfsim(namespace.input + '_rules.xml', 0) # remove encapsulating tags seed = seed[30:-30] # get the seed species definitions as a list # print(">>>>>>>>>> THE SEED LIST: \n", str(seed)) seed_dict = self.tokenize_seed_elements(seed) nauty_dict = {} # print(">>>>>>>>>> SEED DICT: \n", seed_dict) for seed in seed_dict: # initialize nfsim with each species definition and get back a # dirty list where one of the entries is the one we want # # XXX: i think i've solved it on the nfsim side, double check tmpList = self.get_nauty_string(seed_dict[seed]) # print('>>>>>>>> SEED_DICT[SEED]: ' + str(seed_dict[seed])) # print('>>>>>>>> tmpList: ' + str(tmpList)) # and now filter it out... # get species names from species definition string species_names = self.get_names_from_definition_string(seed) nauty_dict[seed] = [ x for x in tmpList if all(y in x for y in species_names) ][0] return nauty_dict def get_nauty_string(self, xmlSpeciesDefinition): self.nfsim.reset_system() self.nfsim.init_system_xml(xmlSpeciesDefinition) result = self.nfsim.querySystemStatus("complex") return result