def test_peppercorn_interface(self): """ Make sure peppercorn.utils did not change """ t0 = peputils.Domain('t0', 5, sequence='H'*5) t0_ = peputils.Domain('t0', 5, sequence='D'*5, is_complement=True) d1 = peputils.Domain('d1', 15, sequence='H'*15) d1_ = peputils.Domain('d1', 15, sequence='D'*15, is_complement=True) domains = [t0,t0_,d1,d1_] s0 = peputils.Strand('s0', [t0, d1]) s1 = peputils.Strand('s1', [d1]) s2 = peputils.Strand('s2', [d1_, t0_]) strands = [s0,s1,s2] c1s = peputils.parse_dot_paren('..') c1 = peputils.Complex('c1', [s0], c1s) c1.check_structure() c2s = peputils.parse_dot_paren('(+).') c2 = peputils.Complex('c2', [s1,s2], c2s) c2.check_structure() complexes = [c1,c2] enum = Enumerator(domains, strands, complexes) enum.enumerate() ########################### # Get full output CRN reactions = enum.reactions self.assertEqual(len(reactions), 3) r1_kernel = 't0 d1 + d1( + ) t0* -> t0( d1 + d1( + ) )' r2_kernel = 't0( d1 + d1( + ) ) -> t0( d1( + ) ) + d1' r3_kernel = 't0( d1 + d1( + ) ) -> t0 d1 + d1( + ) t0*' p_cntr = Counter() r_cntr = Counter() for r in sorted(reactions): #print r.kernel_string() for cx in r.reactants : p_cntr += Counter(map(str, cx.strands)) for cx in r.products: r_cntr += Counter(map(str, cx.strands)) self.assertEqual(p_cntr, r_cntr) self.assertTrue(r.kernel_string() in [r1_kernel, r2_kernel, r3_kernel]) ########################### # Get condensed output CRN condensed = condense_resting_states(enum, compute_rates=True, k_fast = 0.) reactions = condensed['reactions'] self.assertEqual(len(reactions), 1) result_kernel = 't0 d1 + d1( + ) t0* -> t0( d1( + ) ) + d1' self.assertEqual(reactions[0].kernel_string(), result_kernel)
def testtube_to_enumerator(self, testtube): # Does not add reactions! """Initialize the peppercorn enumerator object. Args: testtube <nuskell.objects.TestTube()>: complexes, strands and domains tN_to_eO <dict()>: Mapping from testtube names to enumerator Objects() Returns: Enumerator <peppercorn.enumerator.Enumerator()> """ # Translate to peppercorn domains domains = {} for n, d in testtube.domains.items(): if n[-1] == '*': new_dom = peputils.Domain(n[:-1], d.length, sequence=''.join(d.sequence), is_complement=True) else: new_dom = peputils.Domain(n, d.length, sequence=''.join(d.sequence)) domains[n] = new_dom #print domains.values() # Translate to peppercorn strands strands = {} dom_to_strand = {} for n, s in testtube.strands.items(): dom_to_strand[tuple(map(str, s))] = n doms = [] for d in map(str, s): doms.append(domains[d]) strands[n] = peputils.Strand(n, doms) #print strands.values() # Translate to peppercorn complexes complexes = {} for cplx in testtube.complexes: cplx_strands = [] for s in cplx.lol_sequence: ns = dom_to_strand[tuple(map(str, s))] cplx_strands.append(strands[ns]) complex_structure = peputils.parse_dot_paren(''.join( cplx.structure)) complex = peputils.Complex(cplx.name, cplx_strands, complex_structure) complex.check_structure() complexes[cplx.name] = complex self._enumN_to_ttubeO[cplx.name] = cplx self._ttubeN_to_enumO[cplx.name] = complex #print complexes.values() domains = domains.values() strands = strands.values() complexes = complexes.values() return Enumerator(domains, strands, complexes)
def auto_strand(doms, strands, structures_to_strands): """ Finds or automatically generates a strand from a list of domains. """ # look up whether any strand with this structure exists tdoms = tuple(doms) if tdoms in structures_to_strands: return structures_to_strands[tdoms] # if not, make up a name else: auto_name = initial_auto_name = "_".join(d.name for d in doms) # if another strand exists with this name but not this # structure, generate a new, uglier name if auto_name in strands: auto_name += "_%d" index = 2 while (auto_name % index) in strands: index += 1 auto_name = auto_name % index # TODO: warn about this warning( "Auto-generated strand name %s already taken by a strand with different structure. Auto-generated strand will be named %s." % (initial_auto_name, auto_name)) # generate new strand object strand = utils.Strand(auto_name, doms) strands[auto_name] = strand structures_to_strands[tuple(doms)] = strand return strand
def load_json(filename): """ Loads a saved enumerator from a JSON output file at filename. """ fin = open(filename, 'r') saved = json.load(fin) saved_domains = saved['domains'] domains = {} for saved_domain in saved_domains: if 'sequence' not in saved_domain: saved_domain['sequence'] = None if (saved_domain['is_complement']): saved_domain['name'] = saved_domain['name'][:-1] new_dom = utils.Domain(saved_domain['name'], saved_domain['length'], is_complement=saved_domain['is_complement'], sequence=saved_domain['sequence']) domains[new_dom.name] = new_dom saved_strands = saved['strands'] strands = {} for saved_strand in saved_strands: doms = [] for domain in saved_strand['domains']: doms.append(domains[domain]) new_strand = utils.Strand(saved_strand['name'], doms) strands[saved_strand['name']] = new_strand complexes = {} resting_complexes = {} saved_resting_complexes = saved['resting_complexes'] for saved_complex in saved_resting_complexes: c_strands = [] for strand in saved_complex['strands']: c_strands.append(strands[strand]) new_structure = [] for strand in saved_complex['structure']: new_strand = [] for tup in strand: if (tup is None): new_strand.append(None) else: new_strand.append(tuple(tup)) new_structure.append(new_strand) new_complex = utils.Complex(saved_complex['name'], c_strands, new_structure) resting_complexes[saved_complex['name']] = new_complex complexes[saved_complex['name']] = new_complex transient_complexes = {} saved_transient_complexes = saved['transient_complexes'] for saved_complex in saved_transient_complexes: c_strands = [] for strand in saved_complex['strands']: c_strands.append(strands[strand]) new_structure = [] for strand in saved_complex['structure']: new_strand = [] for tup in strand: if (tup is None): new_strand.append(None) else: new_strand.append(tuple(tup)) new_structure.append(new_strand) new_complex = utils.Complex(saved_complex['name'], c_strands, new_structure) transient_complexes[saved_complex['name']] = new_complex complexes[saved_complex['name']] = new_complex saved_reactions = saved['reactions'] reactions = [] for saved_reaction in saved_reactions: reactants = [] for reactant in saved_reaction['reactants']: reactants.append(complexes[reactant]) products = [] for product in saved_reaction['products']: products.append(complexes[product]) reaction = ReactionPathway(saved_reaction['name'], reactants, products) reactions.append(reaction) resting_states = [] for resting_state in saved['resting_states']: comps = [] for complex in resting_state['complexes']: comps.append(complexes[complex]) resting_states.append(utils.RestingState(resting_state['name'], comps)) initial_complexes = {} for saved_complex in saved['initial_complexes']: c_strands = [] for strand in saved_complex['strands']: c_strands.append(strands[strand]) new_structure = [] for strand in saved_complex['structure']: new_strand = [] for tup in strand: if (tup is None): new_strand.append(None) else: new_strand.append(tuple(tup)) new_structure.append(new_strand) new_complex = utils.Complex(saved_complex['name'], c_strands, new_structure) initial_complexes[saved_complex['name']] = new_complex enumerator = peppercornenumerator.Enumerator(domains.values(), strands.values(), initial_complexes.values()) enumerator._complexes = complexes.values() enumerator._resting_states = resting_states enumerator._transient_complexes = transient_complexes.values() enumerator._resting_complexes = resting_complexes.values() enumerator._reactions = reactions return enumerator
def input_pil(filename): """ Initializes and returns an enumerator from an input file in the Pepper Intermediate Language (PIL) """ fin = open(filename, 'r') domains = {} strands = {} complexes = {} # maps domain-wise strand structures to auto-generated strand names structures_to_strands = {} # We loop over all the lines in the file for (line_counter, line) in enumerate(fin, start=1): line = line.strip() # This was an empty line if line == "": continue # This was a comment elif line.startswith("#"): continue elif line.startswith("length"): # e.g.: # "length a = 6" # parts: 0 1 parts = re.match(r"length\s*([\w-]+)\s*=\s*(\d+)\s*", line) if parts is None: logging.error("Invalid syntax on input line %d" % line_counter) logging.error(line) raise Exception() domain_name, domain_length = parts.groups() if domain_name in domains: logging.error( "Duplicate domain name encountered in input line %d" % line_counter) raise Exception() if not re.match(r'[\w-]+$', domain_name): logging.warn( "Non-alphanumeric domain name %s encountered in input line %d" % (domain_name, line_counter)) domain_length = int(domain_length) domain_sequence = "N" * domain_length # Create the new domains new_dom = utils.Domain(domain_name, domain_length, sequence=domain_sequence) new_dom_comp = utils.Domain(domain_name, domain_length, sequence=domain_sequence, is_complement=True) domains[domain_name] = new_dom domains["%s*" % domain_name] = new_dom_comp # This is the beginning of a domain elif line.startswith("sequence"): # e.g.: # "sequence a = 6 : 6" # parts: 0 1 2 3 # sequence a = NNNNN : 6 parts = re.match( r"sequence\s*([\w-]+)\s*=\s*(\w+)\s*:?\s*(\d?)\s*", line) if parts is None: logging.error("Invalid syntax on input line %d" % line_counter) logging.error(line) raise Exception() domain_name, domain_sequence, length = parts.groups() if domain_name in domains: logging.error( "Duplicate domain name encountered in input line %d" % line_counter) raise Exception() if not re.match(r'[\w-]+$', domain_name): logging.warn( "Non-alphanumeric domain name %s encountered in input line %d" % (domain_name, line_counter)) # The sequence specification # domain_sequence = parts[1] domain_length = len(domain_sequence) # Create the new domains new_dom = utils.Domain(domain_name, domain_length, sequence=domain_sequence) new_dom_comp = utils.Domain(domain_name, domain_length, sequence=domain_sequence, is_complement=True) domains[domain_name] = new_dom domains["%s*" % domain_name] = new_dom_comp elif line.startswith("sup-sequence"): # e.g. # "sequence a = b c d e : 6" # 0 1 2 parts = re.match( r"sup-sequence\s*([\w-]+)\s*=\s*((?:[\w-]+\s*)+):?(\d?)", line) if parts is None: logging.error("Invalid syntax on input line %d" % line_counter) logging.error(line) raise Exception() domain_name, sequence_names, length = parts.groups() # domain name if domain_name in domains: logging.error( "Duplicate domain name encountered in input line %d" % line_counter) raise Exception() if not re.match(r'[\w-]+$', domain_name): logging.warn( "Non-alphanumeric domain name %s encountered in input line %d" % (domain_name, line_counter)) # subsequences sequence = "" for sequence_name in sequence_names.split(): sequence_name = sequence_name.strip() if sequence_name == "": continue # make sure each subsequence is defined if sequence_name not in domains: logging.error( "Unknown domain name '%s' in super-sequence on input line %d" % (sequence_name, line_counter)) logging.error(line) raise Exception() # build up the full sequence sequence += domains[sequence_name].sequence # check for correctness if length: if int(length) != len(sequence): logging.error( "Sequence length for super-sequence %s is %d, not equal to expected value %d on input line %d" % (domain_name, len(sequence), length, line_counter)) raise Exception() # The sequence specification domain_sequence = sequence domain_length = len(sequence) # Create the new domains new_dom = utils.Domain(domain_name, domain_length, sequence=domain_sequence) new_dom_comp = utils.Domain(domain_name, domain_length, sequence=domain_sequence, is_complement=True) domains[domain_name] = new_dom domains["%s*" % domain_name] = new_dom_comp elif line.startswith("equal"): parts = line.split() if len(parts) < 3: logging.error( "'equal' statement does not specify at least 2 domains on input line %d" % line_counter) logging.error(line) raise Exception() source_domain_name = parts[1] target_domain_names = parts[2:] if source_domain_name not in domains: logging.error( "Unknown domain name '%s' in 'equals' statement on input line %d" % (source_domain_name, line_counter)) logging.error(line) raise Exception() source_domain = domains[source_domain_name] for target_domain_name in target_domain_names: new_dom = utils.Domain(target_domain_name, len(source_domain), sequence=source_domain.sequence) new_dom_comp = utils.Domain(target_domain_name, len(source_domain), sequence=source_domain.sequence, is_complement=True) domains[target_domain_name] = new_dom domains["%s*" % target_domain_name] = new_dom_comp # This is the beginning of a strand elif line.startswith("strand"): # e.g.: # "strand A = a x b y z* c* y* b* x*" # parts: 0 1 2 3 4 5 6 ... parts = re.match( r"strand\s*([\w-]+)\s*=\s*((?:[\w*-]+\s*)+):?(\d?)", line) if parts is None: logging.error("Invalid syntax on input line %d" % line_counter) logging.error(line) raise Exception() strand_name, strand_dom_names, length = parts.groups() if strand_name in strands: logging.error( "Duplicate strand name encountered in input line %d" % line_counter) raise Exception() if not re.match(r'\w+$', strand_name): logging.warn( "Non-alphanumeric strand name %s encountered in input line %d" % (strand_name, line_counter)) strand_doms = [] for domain_name in filter(None, strand_dom_names.split()): if domain_name not in domains: logging.error( "Invalid domain name %s encountered in input line %d" % (domain_name, line_counter)) logging.error(line) print domains raise Exception() strand_doms.append(domains[domain_name]) if len(strand_doms) == 0: logging.warn("0-length strand encountered in input line %d") new_strand = utils.Strand(strand_name, strand_doms) strands[strand_name] = new_strand # This is the beginning of a complex elif line.startswith("structure"): # parse `structure` line: # e.g.: # structure A = S1 : .(((..))) # structure [ 1nt ] name = s1 s2 s3 + s4 : # ....((+))...((..)).... parts = re.match( r"structure\s+(\[[^\]]+\])?\s*([\w-]+)\s*=\s*((?:[\w-]+\s*\+?\s*)+):\s*([().+\s]+)", line) if parts is None: # parse `structure` line: # e.g.: # structure A = S1 : .(((..))) # structure name = s1 s2 s3 + s4 : # ....((+))...((..)).... parts = re.match( r"structure\s+([\w-]+)\s*=\s*((?:[\w-]+\s*\+?\s*)+):\s*([().+\s]+)", line) if parts is None: logging.error("Invalid syntax on input line %d" % line_counter) logging.error(line) raise Exception() else: complex_name, strands_line, structure_line = parts.groups() parameters = "" else: parameters, complex_name, strands_line, structure_line = parts.groups( ) # parse parameters if parameters is None: parameters = "" params = utils.parse_parameters(parameters) # check for duplicate complex name if complex_name in complexes: logging.error( "Duplicate complex name encountered in input line %d" % line_counter) raise Exception() # check for non-alphanumeric complex name if not re.match(r'\w+$', complex_name): logging.warn( "Non-alphanumeric complex name %s encountered in input line %d" % (complex_name, line_counter)) # get strand names, allowing optional '+' characters complex_strands = [] strands_line_parts = [ name for name in strands_line.split() if name != "+" ] for strand_name in strands_line_parts: if strand_name not in strands: logging.error( "Invalid strand name %s encountered in input line %d" % (strand_name, line_counter)) raise Exception() else: complex_strands.append(strands[strand_name]) # parse dot-paren structure, then do some horrible magic to guess # if it's basewise or segmentwise... complex_structure = utils.parse_dot_paren(structure_line) struct_length = sum(map(len, complex_structure)) # sum([ len(d) for c in complex_strands for d in c.domains ]) domains_length = sum(map(len, complex_strands)) if (struct_length > domains_length): complex_structure = utils.parse_basewise_dot_paren( structure_line, complex_strands) struct_length = sum(map(len, complex_structure)) if (struct_length != domains_length): logging.error( "Complex %(name)s has %(doms)d domains but structure size %(struct_length)d. (structure was '%(struct)s')" % { "name": complex_name, "doms": domains_length, "struct_length": struct_length, "struct": structure_line }) raise Exception() elif (struct_length != domains_length): logging.error( "Complex %(name)s has %(doms)d domains but structure size %(struct_length)d. (structure was '%(struct)s')" % { "name": complex_name, "doms": domains_length, "struct_length": struct_length, "struct": structure_line }) raise Exception() complex = utils.Complex(complex_name, complex_strands, complex_structure) complex.check_structure() # apply parameters if params['concentration'] is not None: complex.concentration = params['concentration'] complexes[complex_name] = complex elif line.startswith("kinetic"): continue elif line.strip() == "": continue else: try: resolve_kernel([line], domains, strands, structures_to_strands, complexes) except Exception as e: logging.error( "Unexpected characters encountered in input line %d; tried to parse as Kernel statement but got error: %s" % (line_counter, str(e))) raise Exception() # line = fin.readline() # line_counter += 1 fin.close() domains = domains.values() strands = strands.values() complexes = complexes.values() enumerator = peppercornenumerator.Enumerator(domains, strands, complexes) return enumerator
def input_enum(filename): """ Initializes and returns an enumerator from a standard input file. """ fin = open(filename, 'r') domains = {} strands = {} complexes = {} line_counter = 1 line = fin.readline() # We loop over all the lines in the file while (line != ""): line = line.strip() # This was an empty line if line == "": line = fin.readline() continue # This was a comment elif line.startswith("#"): line = fin.readline() continue # This is the beginning of a domain elif line.startswith("domain"): # e.g.: # "domain a : 6" # parts: 0 1 2 3 parts = line.split() domain_name = parts[1] if domain_name in domains: logging.error( "Duplicate domain name encountered in input line %d" % line_counter) raise Exception() if not re.match(r'\w+$', domain_name): logging.warn( "Non-alphanumeric domain name %s encountered in input line %d" % (domain_name, line_counter)) # The domain length could be either short or long or it could be # an exact number domain_length = parts[3] if not ((domain_length == 'short') or (domain_length == 'long')): domain_length = int(domain_length) if domain_length <= 0: logging.warn("Domain of length %d found in input line %d" % (domain_length, line_counter)) # Check to see if a sequence is specified if len(parts) > 4: domain_sequence = parts[4] else: domain_sequence = None # Create the new domains new_dom = utils.Domain(domain_name, domain_length, sequence=domain_sequence) new_dom_comp = utils.Domain(domain_name, domain_length, sequence=domain_sequence, is_complement=True) domains[domain_name] = new_dom domains["%s*" % domain_name] = new_dom_comp # This is the beginning of a strand elif line.startswith("strand"): # e.g.: # "strand A : a x b y z* c* y* b* x*" # parts: 0 1 2 3 4 5 6 ... parts = line.split() strand_name = parts[1] if strand_name in strands: logging.error( "Duplicate strand name encountered in input line %d" % line_counter) raise Exception() if not re.match(r'\w+$', strand_name): logging.warn( "Non-alphanumeric strand name %s encountered in input line %d" % (strand_name, line_counter)) strand_doms = [] for domain_name in parts[3:]: if domain_name not in domains: logging.error( "Invalid domain name %s encountered in input line %d" % (domain_name, line_counter)) raise Exception() strand_doms.append(domains[domain_name]) if len(strand_doms) == 0: logging.warn("0-length strand encountered in input line %d") new_strand = utils.Strand(strand_name, strand_doms) strands[strand_name] = new_strand # This is the beginning of a complex elif line.startswith("complex"): # e.g.: # complex A :\n # A\n <- strands_line # .(((..)))\n <- structure_line parts = line.split() complex_name = parts[1] if complex_name in complexes: logging.error( "Duplicate complex name encountered in input line %d" % line_counter) raise Exception() if not re.match(r'\w+$', complex_name): logging.warn( "Non-alphanumeric complex name %s encountered in input line %d" % (complex_name, line_counter)) complex_strands = [] strands_line = fin.readline() strands_line = strands_line.strip() strands_line_parts = strands_line.split() for strand_name in strands_line_parts: if strand_name not in strands: logging.error( "Invalid strand name %s encountered in input line %d" % (strand_name, line_counter)) raise Exception() else: complex_strands.append(strands[strand_name]) structure_line = fin.readline() structure_line = structure_line.strip() complex_structure = utils.parse_dot_paren(structure_line) struct_length = sum(map(len, complex_structure)) domains_length = sum(map(len, complex_strands)) if (struct_length != domains_length): logging.error( "Complex %(name)s has %(doms)d domains but structure size %(struct_length)d. (structure was %(struct)s)" % { "name": complex_name, "doms": domains_length, "struct_length": struct_length, "struct": structure_line }) raise Exception() complex = utils.Complex(complex_name, complex_strands, complex_structure) complex.check_structure() complexes[complex_name] = complex else: logging.error( "Unexpected characters encountered in input line %d" % line_counter) raise Exception() line = fin.readline() line_counter += 1 domains = domains.values() strands = strands.values() complexes = complexes.values() enumerator = peppercornenumerator.Enumerator(domains, strands, complexes) return enumerator