def test_complex_concentrations(self): out = parse_pil_string("cplx = a( b( c( + ) ) d ) @ constant 1e-7 M") self.assertEqual(out, [[ 'kernel-complex', 'cplx', ['a', ['b', ['c', ['+']], 'd']], ['constant', '1e-7', 'M'] ]]) out = parse_pil_string("cplx = a( b( c( + ) ) d ) @ constant 1e-4 mM") self.assertEqual(out, [[ 'kernel-complex', 'cplx', ['a', ['b', ['c', ['+']], 'd']], ['constant', '1e-4', 'mM'] ]]) out = parse_pil_string("cplx = a( b( c( + ) ) d ) @ constant 0.1 uM") self.assertEqual(out, [[ 'kernel-complex', 'cplx', ['a', ['b', ['c', ['+']], 'd']], ['constant', '0.1', 'uM'] ]]) out = parse_pil_string("cplx = a( b( c( + ) ) d ) @ initial 100 nM") self.assertEqual(out, [[ 'kernel-complex', 'cplx', ['a', ['b', ['c', ['+']], 'd']], ['initial', '100', 'nM'] ]]) out = parse_pil_string("cplx = a( b( c( + ) ) d ) @ initial 1e5 pM") self.assertEqual(out, [[ 'kernel-complex', 'cplx', ['a', ['b', ['c', ['+']], 'd']], ['initial', '1e5', 'pM'] ]])
def test_strand_complex(self): out = parse_pil_string(" structure AB = A B : .(((+))) ") self.assertEqual(out, [['strand-complex', 'AB', ['A', 'B'], '.(((+))) ']]) out = parse_pil_string(" structure AB = A + B : .(((+))) ") self.assertEqual(out, [['strand-complex', 'AB', ['A', 'B'], '.(((+))) ']]) out = parse_pil_string( " complex IABC : I A B C (((( + ))))((((. + ))))((((. + ))))..... " ) self.assertEqual(out, [[ 'strand-complex', 'IABC', ['I', 'A', 'B', 'C'], '(((( + ))))((((. + ))))((((. + ))))..... ' ]]) out = parse_pil_string(" complex I : I .... ") self.assertEqual( out, [['strand-complex', 'I', ['I'], '.... ']]) out = parse_pil_string(""" complex IABC : I A B C (((( + ))))((((. + ))))((((. + ))))..... """) self.assertEqual(out, [[ 'strand-complex', 'IABC', ['I', 'A', 'B', 'C'], '(((( + ))))((((. + ))))((((. + ))))..... ' ]])
def test_sl_domains(self): out = parse_pil_string(" sequence a1 = CTAGA : 6 ") self.assertEqual(out, [['sl-domain', 'a1', 'CTAGA', '6']]) out = parse_pil_string(" sequence a1 = CTAGA ") self.assertEqual(out, [['sl-domain', 'a1', 'CTAGA']]) out = parse_pil_string(" sequence 1 = CTA : 6 ") self.assertEqual(out, [['sl-domain', '1', 'CTA', '6']])
def test_dl_domains(self): out = parse_pil_string(" length a : short ") self.assertEqual(out, [['dl-domain', 'a', 'short']]) out = parse_pil_string(" domain a : 6 ") self.assertEqual(out, [['dl-domain', 'a', '6']]) out = parse_pil_string(" domain a : short ") self.assertEqual(out, [['dl-domain', 'a', 'short']]) out = parse_pil_string(" sequence a : 18 ") self.assertEqual(out, [['dl-domain', 'a', '18']])
def test_not_implemented(self): # there is currently no support of additional arguments # for complexes, e.g. this: [1nt] with self.assertRaises(ParseException): parse_pil_string(" structure [1nt] AB = A B : .(((+))) ") with self.assertRaises(ParseException): parse_pil_string( " structure [1nt] AB = A + B : ......((((((((((((((((((((((((((((((((((+)))))))))))))))))))))))))))))))))) " )
def test_composite_domains(self): out = parse_pil_string(" sup-sequence q = a b-seq z : 20 ") self.assertEqual( out, [['composite-domain', 'q', ['a', 'b-seq', 'z'], '20']]) out = parse_pil_string(" strand q = a b-seq z : 20 ") self.assertEqual( out, [['composite-domain', 'q', ['a', 'b-seq', 'z'], '20']]) out = parse_pil_string(" strand q = a b-seq z ") self.assertEqual(out, [['composite-domain', 'q', ['a', 'b-seq', 'z']]]) out = parse_pil_string(" strand I : y* b* x* a* ") self.assertEqual(out, [['composite-domain', 'I', ['y*', 'b*', 'x*', 'a*']]])
def test_kernel_complex(self): out = parse_pil_string( " e10 = 2( 3 + 3( 4( + ) ) ) 1*( + ) 2 @ initial 0 nM") self.assertEqual(out, [[ 'kernel-complex', 'e10', ['2', ['3', '+', '3', ['4', ['+']]], '1*', ['+'], '2'], ['initial', '0', 'nM'] ]]) out = parse_pil_string(" C = 1 2 3( ) + 4 ") self.assertEqual( out, [['kernel-complex', 'C', ['1', '2', '3', [], '+', '4']]]) out = parse_pil_string(" C = 1 2 3() + 4 ") self.assertEqual( out, [['kernel-complex', 'C', ['1', '2', '3', [], '+', '4']]]) out = parse_pil_string(" C = 1 2(3(+))") self.assertEqual(out, [['kernel-complex', 'C', ['1', '2', ['3', ['+']]]]]) out = parse_pil_string( " fuel2 = 2b( 3a( 3b( 3c( 3d( 4a( 4b 4c + ) ) ) ) ) ) 2a* " ) self.assertEqual(out, [[ 'kernel-complex', 'fuel2', [ '2b', [ '3a', ['3b', ['3c', ['3d', ['4a', ['4b', '4c', '+']]]]] ], '2a*' ] ]]) out = parse_pil_string( " fuel2 = 2b( 3a( 3b( 3c( 3d( 4a( 4b 4c + )))))) 2a* " ) self.assertEqual(out, [[ 'kernel-complex', 'fuel2', [ '2b', [ '3a', ['3b', ['3c', ['3d', ['4a', ['4b', '4c', '+']]]]] ], '2a*' ] ]]) out = parse_pil_string( " fuel2 = 2b(3a( 3b( 3c( 3d( 4a( 4b 4c + )))))) 2a* " ) self.assertEqual(out, [[ 'kernel-complex', 'fuel2', [ '2b', [ '3a', ['3b', ['3c', ['3d', ['4a', ['4b', '4c', '+']]]]] ], '2a*' ] ]]) out = parse_pil_string( " fuel2 = 2b(3a( 3b( 3c( 3d( 4a( )))))) 2a* " ) self.assertEqual(out, [[ 'kernel-complex', 'fuel2', ['2b', ['3a', ['3b', ['3c', ['3d', ['4a', []]]]]], '2a*'] ]])
def test_reactions(self): out = parse_pil_string(" kinetic 4 + C1 -> 7 ") self.assertEqual(out, [['reaction', [], ['4', 'C1'], ['7']]]) out = parse_pil_string( " kinetic [ 876687.69 /M/s] I3 + SP -> C2 + Cat ") self.assertEqual(out, [[ 'reaction', [[], ['876687.69'], ['/M/s']], ['I3', 'SP'], ['C2', 'Cat'] ]]) out = parse_pil_string( " kinetic [ 1667015.4 /M/s] I3 + C1 -> W + Cat + OP ") self.assertEqual(out, [[ 'reaction', [[], ['1667015.4'], ['/M/s']], ['I3', 'C1'], ['W', 'Cat', 'OP'] ]]) out = parse_pil_string( " reaction [branch-3way = 0.733333 /s ] e71 -> e11 ") self.assertEqual(out, [[ 'reaction', [['branch-3way'], ['0.733333'], ['/s']], ['e71'], ['e11'] ]]) out = parse_pil_string( " reaction [bind21 = 4.5e+06 /M/s ] e4 + G1bot -> e13") self.assertEqual(out, [[ 'reaction', [['bind21'], ['4.5e+06'], ['/M/s']], ['e4', 'G1bot'], ['e13'] ]]) out = parse_pil_string( " reaction [bind21 = 4.5e+06 /nM/h ] e4 + G1bot -> e13") self.assertEqual(out, [[ 'reaction', [['bind21'], ['4.5e+06'], ['/nM/h']], ['e4', 'G1bot'], ['e13'] ]]) out = parse_pil_string( "reaction [k1 = 1.41076e+07 +/- 1.47099e+06 /M/s] A + B -> A_B") self.assertEqual(out, [[ 'reaction', [['k1'], ['1.41076e+07', '1.47099e+06'], ['/M/s']], ['A', 'B'], ['A_B'] ]]) out = parse_pil_string( "reaction [k1 = 1.41076e+07 +/- inf /M/s] A + B -> A_B") self.assertEqual(out, [[ 'reaction', [['k1'], ['1.41076e+07', 'inf'], ['/M/s']], ['A', 'B'], ['A_B'] ]])
def test_broken_inputs(self): # A digit in the sequence ... with self.assertRaises(ParseException): parse_pil_string(""" sequence a = CT1 : 6""") # A strand with no name ... with self.assertRaises(ParseException): parse_pil_string(""" strand = CT1 : 6""") with self.assertRaises(ParseException): parse_pil_string(" state e4 = e4")
def test_resting_macrostate(self): out = parse_pil_string(" state e4 = [e4]") self.assertEqual(out, [['resting-macrostate', 'e4', ['e4']]]) out = parse_pil_string(" state e4 = [e4, e5]") self.assertEqual(out, [['resting-macrostate', 'e4', ['e4', 'e5']]])
def read_pil(data, is_file = False, composite = False): """ Peppercorn standard input. Supports a variety of pil-style dialects, including kernel and enum. Args: data (str): Is either the PIL file in string format or the path to a file. is_file (bool): True if data is a path to a file, False otherwise composite (bool, optional): Returns an additional dictionary that maps names of composite domains (or strands) to a list of domains. """ if is_file : parsed_file = parse_pil_file(data) else : parsed_file = parse_pil_string(data) domains = {'+' : '+'} # saves some code sequences = {} complexes = {} reactions = [] for line in parsed_file : name = line[1] if line[0] == 'dl-domain': if line[2] == 'short': (dtype, dlen) = ('short', None) elif line[2] == 'long': (dtype, dlen) = ('long', None) else : (dtype, dlen) = (None, int(line[2])) if name not in domains: domains[name] = PepperDomain(name, dtype = dtype, length = dlen) logging.info('Domain {} with length {}'.format(domains[name], len(domains[name]))) cname = name[:-1] if domains[name].is_complement else name + '*' if cname in domains: assert domains[cname] == ~domains[name] else : domains[cname] = ~domains[name] elif line[0] == 'sl-domain': logging.info("Ignoring sequence information for domain {}.".format(name)) if len(line) == 4: if int(line[3]) != len(line[2]): logging.error("Sequence/Length information inconsistent {} vs ().".format( line[3], len(line[2]))) domains[name] = PepperDomain(name, length = int(line[3])) else : domains[name] = PepperDomain(name, length = len(line[2])) domains[name].nucleotides = line[2] cname = name[:-1] if domains[name].is_complement else name + '*' if cname in domains: assert domains[cname] == ~domains[name] else : domains[cname] = ~domains[name] elif line[0] == 'composite-domain': # This could be a strand definition or a composite domain. assert name[-1] != '*' sequences[name] = map(lambda x: domains[x], line[2]) def comp(name): return name[:-1] if name[-1] == '*' else name + '*' sequences[comp(name)] = map(lambda x: domains[comp(x)], reversed(line[2])) elif line[0] == 'strand-complex': sequence = [] for strand in line[2]: sequence += sequences[strand] + ['+'] sequence = sequence[:-1] structure = line[3].replace(' ','') complexes[name] = PepperComplex(sequence, list(structure), name=name) elif line[0] == 'kernel-complex': sequence, structure = resolve_loops(line[2]) # Replace names with domain objects. try : sequence = map(lambda d : domains[d], sequence) except KeyError: for e, d in enumerate(sequence): if isinstance(d, PepperDomain): # Happens with composite domains, see next statement e+1 continue if d in sequences : for i, c in enumerate(sequences[d]): assert c.name in domains if i == 0: sequence[e] = c else : sequence.insert(e+i, c) structure.insert(e+i, structure[e]) elif d not in domains : logging.warning("Assuming {} is a long domain.".format(d)) domains[d] = PepperDomain(d, 'long') cdom = ~domains[d] domains[cdom.name] = cdom sequence[e] = domains[d] else : sequence[e] = domains[d] complexes[name] = PepperComplex(sequence, structure, name=name) if len(line) > 3 : assert len(line[3]) == 3 complexes[name]._concentration = tuple(line[3]) elif line[0] == 'reaction': reactants, products, rtype, rate, units, r = read_reaction(line) if r is None: continue try : reactants = map(lambda c : complexes[c], reactants) products = map(lambda c : complexes[c], products) except KeyError: logging.warning("Ignoring input reaction with undefined complex: {}".format(r)) continue reaction = PepperReaction(reactants, products, rtype=rtype, rate=rate) if reaction.rateunits != units: logging.error("Rate units must be given in {}, not: {}.".format(reaction.rateunits, units)) raise SystemExit reactions.append(reaction) else : logging.warning("Ignoring {} specification: {}".format(line[0], name)) if composite : return complexes, reactions, sequences else : return complexes, reactions
def load_pil_crn(data): """ Input for pilsimulator """ parsed_file = parse_pil_string(data) sysunit = None species = dict() macrostates = dict() reactions = [] for line in parsed_file: if line[0] == 'kernel-complex': name = line[1] conc = 0 if len(line) > 3 : init = line[3][0] conc = float(line[3][1]) if sysunit is None: sysunit = line[3][2] else : if sysunit != line[3][2]: raise PilFormatError( 'Conflicting units: {} vs. {}'.format(sysunit, line[3][2])) if init[0] != 'i': raise NotImplementedError('concentrations must be specified as *initial*') species[name] = ('initial', conc) elif line[0] == 'resting-macrostate': name = line[1] conc = ['initial', 0] for sp in line[2]: assert sp in species if species[sp][0][0] != 'i': raise NotImplementedError('concentrations must be specified as *initial*') conc[1] += species[sp][1] macrostates[name] = tuple(conc) elif line[0] == 'reaction': info = line[1] reactants = line[2] products = line[3] assert len(info) == 3 rate = float(info[1][0]) if sysunit and filter(lambda x : x != sysunit, info[2][0].split('/')[1:-1]): raise PilFormatError('Conflicting units: {} vs. {}'.format(sysunit, info[2][0])) reactions.append([reactants, products, [rate]]) elif line[0] == 'dl-domain' : pass else : print('# Ignoring Keyword: {}'.format(line[0])) detailed = None for rxn in reactions: reac = rxn[0] prod = rxn[1] rate = rxn[2] if any(map(lambda r: r in macrostates, reac + prod)): assert all(map(lambda r: r in macrostates, reac + prod)) d = False else : d = True if detailed is None: detailed = d else : if detailed != d: raise PilFormatError('Need to provide either detailed or condensed CRN for simulation. Not both!') return reactions, species if detailed else macrostates
def read_pil(data, is_file = False, composite = False): """ Read PIL file format. Use dsdobjects parser to extract information. Load kinda.objects. Args: data (str): Is either the PIL file in string format or the path to a file. is_file (bool): True if data is a path to a file, False otherwise """ if is_file : parsed_file = parse_pil_file(data) else : parsed_file = parse_pil_string(data) domains = {'+' : '+'} # saves some code strands = {} get_strand = {} complexes = {} resting = {} con_reactions = [] det_reactions = [] for line in parsed_file : name = line[1] if line[0] == 'dl-domain': raise PilFormatError('KinDA needs nucleotide level information.') elif line[0] == 'sl-domain': if len(line) == 4: if int(line[3]) != len(line[2]): raise PilFormatError("Sequence/Length information inconsistent {} vs ().".format( line[3], len(line[2]))) sequence = dna.Sequence(line[2]) if name[-1] == '*': # This will be possible, sooner or later. But we have to make sure the # kinda.objects can handle it. raise NotImplementedError else : dom = dna.Domain(name = name, sequence = line[2]) domains[dom.name] = dom domains[dom.complement.name] = dom.complement elif line[0] == 'composite-domain': domain_list = map(lambda x: domains[x], line[2]) d = dna.Domain(name = name, subdomains = domain_list) domains[d.name] = d domains[d.complement.name] = d.complement # if it is a strand... s = dna.Strand(name = name, domains = domain_list) strands[s.name] = s strands[s.complement.name] = s.complement get_strand[tuple(domain_list)] = s elif line[0] == 'strand-complex': strand_list = map(lambda x: strands[x], line[2]) structure = line[3].replace(' ','') cplx = dna.Complex(name = name, strands = strand_list, structure = structure ) complexes[cplx.name] = cplx elif line[0] == 'kernel-complex': sequence, structure = resolve_loops(line[2]) # Replace names with domain objects. try : sequence = map(lambda d : domains[d], sequence) except KeyError: raise PilFormatError("Cannot find domain: {}.".format(d)) current = [] strand_list = [] for d in sequence + ['+']: if isinstance(d, dna.Domain): current.append(d) else: if tuple(current) not in get_strand: sname = '_'.join(map(str, current)) s = dna.Strand(name = sname, domains = current) strands[s.name] = s strands[s.complement.name] = s.complement get_strand[tuple(current)] = s strand_list.append(get_strand[tuple(current)]) current = [] cplx = dna.Complex(name = name, strands = strand_list, structure = ''.join(structure)) complexes[cplx.name] = cplx elif line[0] == 'resting-macrostate': cplxs = map(lambda c : complexes[c], line[2]) resting[name] = dna.RestingSet(name = name, complexes = cplxs) elif line[0] == 'reaction': rtype = line[1][0][0] if line[1] != [] and line[1][0] != [] else None assert rtype is not None if rtype == 'condensed' : reactants = map(lambda c : resting[c], line[2]) products = map(lambda c : resting[c], line[3]) con_reactions.append( dna.RestingSetReaction(reactants = reactants, products = products)) else : reactants = map(lambda c : complexes[c], line[2]) products = map(lambda c : complexes[c], line[3]) det_reactions.append( dna.Reaction(reactants = reactants, products = products)) else : print('# Ignoring keyword: {}'.format(line[0])) # Make sure the reverse reaction between every pair of reactants is # included. These unproductive reactions will be important stop states for # Mulstistrand simulations. reactant_pairs = it.product(resting.values(), resting.values()) for reactants in reactant_pairs: con_reactions.append(dna.RestingSetReaction(reactants = reactants, products = reactants)) return complexes.values(), det_reactions, resting.values(), con_reactions