def macrostate_to_dnf(macrostate, simplify=True): """ Returns a macrostate in disjunctive normal form (i.e. an OR of ANDs). Note that this may lead to exponential explosion in the number of terms. However it is necessary when creating Multistrand Macrostates, which can only be represented in this way. Also, we don't try to simplify much so the expressions may be inefficient/redundant. Adding simplifications of the logical expression using (e.g.) De Morgan's laws is a future optimization. """ from macrostate import Macrostate if macrostate.type != Macrostate.types[ 'conjunction'] and macrostate.type != Macrostate.types[ 'disjunction']: dnf_macrostates = [ Macrostate(type='conjunction', macrostates=[macrostate]) ] elif macrostate.type == Macrostate.types['conjunction']: clauses = [ macrostate_to_dnf(m, simplify=False) for m in macrostate.macrostates ] dnf_macrostates = clauses[0].macrostates for clause in clauses[1:]: # multiply two dnf clauses dnf_macrostates = [ Macrostate(type='conjunction', macrostates=m1.macrostates + m2.macrostates) for m1, m2 in it.product(dnf_macrostates, clause.macrostates) ] elif macrostate.type == Macrostate.types['disjunction']: clauses = [ macrostate_to_dnf(m, simplify=False) for m in macrostate.macrostates ] dnf_macrostates = [] for clause in clauses: # add two dnf clauses dnf_macrostates += clause.macrostates # The most basic simplification. We just subsitute AND/OR expressions with only one operand # with just that operand. if simplify: for i, m in enumerate(dnf_macrostates): if len(m.macrostates) == 1: dnf_macrostates[i] = m.macrostates[0] if simplify and len(dnf_macrostates) == 1: dnf = dnf_macrostates[0] else: dnf = Macrostate(type='disjunction', macrostates=dnf_macrostates) return dnf
def count_by_complex_macrostate(complex, cutoff): """ Creates a macrostate corresponding to secondary structures that match the binding of the given complex, within the given cutoff. cutoff is a fractional defect over the entire complex. """ from macrostate import Macrostate return Macrostate(name="macrostate_{}_{}".format(complex.name, cutoff), type="count", complex=complex, cutoff=cutoff)
def restingset_count_by_domain_macrostate(restingset, cutoff): #print "WARNING: Multistrand may not support macrostates that are defined as a per-domain p-approximation" from macrostate import Macrostate macrostates = [ count_by_domain_macrostate(complex, cutoff) for complex in restingset.complexes ] return Macrostate(name="macrostate_{}".format(restingset.name), type="disjunction", macrostates=macrostates)
def restingset_count_by_complex_macrostate(restingset, cutoff): """ Creates a macrostate corresponding to secondary structures that match the binding of one of the complexes in the given resting set, within the given cutoff. cutoff is a fractional defect over the entire complex. """ from macrostate import Macrostate macrostates = [ count_by_complex_macrostate(complex, cutoff) for complex in restingset.complexes ] return Macrostate(name="macrostate_{}".format(restingset.name), type="disjunction", macrostates=macrostates)
def count_by_domain_macrostate(complex, cutoff): """ Returns a Macrostate that matches a complex such that every domain matches the given complex's structure to within the cutoff fraction. cutoff is a fractional defect allowed over each domain. """ from macrostate import Macrostate macrostates = [] for strand_num, strand in enumerate(complex.strands): for domain_num in range(len(strand.domains)): macrostates.append( loose_domain_macrostate(complex, strand_num, domain_num, cutoff)) return Macrostate(name=complex.name + "_loose-domains", type='conjunction', macrostates=macrostates)
def loose_domain_macrostate(complex, strand_num, domain_num, cutoff): """ Creates a loose macrostate with the given domain as the region of interest. The cutoff is a fractional defect within this domain. The results are not meaningful if the domain is not completely bound or completely unbound.""" from complex import Complex from structure import Structure from macrostate import Macrostate strandlist = [ strand_struct[:] for strand_struct in complex.structure.to_strandlist() ] strands = complex.strands domain_start = sum( [d.length for d in strands[strand_num].domains[:domain_num]]) domain_end = domain_start + strands[strand_num].domains[domain_num].length n = 0 for strand_n, strand_struct in enumerate(strandlist): for i, bound in enumerate(strand_struct): is_domain = (strand_n == strand_num and domain_start <= i < domain_end) is_bound_to_domain = (bound != None and bound != '?' and bound[0] == strand_num and domain_start <= bound[1] < domain_end) if not is_domain and not is_bound_to_domain: strandlist[strand_n][i] = '?' else: n += 1 ms_complex = Complex(name=complex.name + "_({0},{1})".format(strand_num, domain_num), strands=strands, structure=strandlist) return Macrostate(name="macrostate_" + ms_complex.name, type='loose', complex=ms_complex, cutoff=int(cutoff * n))
def exact_complex_macrostate(complex): from macrostate import Macrostate return Macrostate(name="macrostate_" + complex.name, type='exact', complex=complex)