def products(self): if self._products: return self._products self._products = [] for fp in self.fwd_primers: for rp in self.rev_primers: if self.template.circular and fp.position>rp.position: tmpl = self.template.shifted(fp.position-len(fp.footprint)) tmpl = tmpl._multiply_circular(2) tmpl = tmpl[:len(self.template) - (fp.position - rp.position) + len(rp.footprint) + len(fp.footprint)] #print len(self.template) - (fp.position - rp.position) + len(rp.footprint) + len(fp.footprint) #print len(tmpl) elif self.template.circular: tmpl = self.template._multiply_circular(3) tmpl = tmpl[fp.position-len(fp.footprint)+len(self.template):rp.position+len(rp.footprint)+len(self.template)] else: tmpl = self.template[fp.position-len(fp.footprint):rp.position+len(rp.footprint)] prd = ( Dseqrecord(fp.tail) + tmpl + Dseqrecord(rp.tail).reverse_complement()) prd.add_feature( 0, len(fp), label=fp.id) prd.add_feature( len(prd)-len(rp),len(prd),label=rp.id, strand=-1) #prd.seq = fp.seq+tmpl.seq[len(fp.footprint):len(tmpl)-len(rp.footprint)]+rp.seq.reverse_complement() #features = tmpl[fp.position-len(fp.footprint):rp.position+len(rp.footprint)].features #print fp.position-len(fp.footprint), rp.position+len(rp.footprint), features #print "<<<<<<<<<<<<<<",features #prd.features = [f._shift(len(fp.tail)) for f in features] # description = Genbank LOCUS max 16 chars prd.name = "{0}bp_PCR_prod".format(len(prd))[:16] prd.id = "{0}bp {1}".format( str(len(prd))[:14], prd.seguid() ) prd.description="Product_{0}_{1}".format( fp.description, rp.description) self._products.append( Amplicon(prd, template=tmpl, forward_primer=fp, reverse_primer=rp, saltc=50, forward_primer_concentration=1000, reverse_primer_concentration=1000)) assert " " not in str(prd.seq.watson) assert " " not in str(prd.seq.crick) return self._products
def _assemble(self): for dr in self.dsrecs: if dr.name in ("", ".", "<unknown name>", None): dr.name = "frag{}".format(len(dr)) if self.only_terminal_overlaps: algorithm = terminal_overlap else: algorithm = common_sub_strings # analyze_overlaps cols = {} for dsrec in self.dsrecs: dsrec.features = [f for f in dsrec.features if f.type != "overlap"] dsrec.seq = Dseq(dsrec.seq.todata) rcs = {dsrec: dsrec.rc() for dsrec in self.dsrecs} matches = [] dsset = OrderedSet() for a, b in itertools.combinations(self.dsrecs, 2): match = algorithm( str(a.seq).upper(), str(b.seq).upper(), self.limit) if match: matches.append((a, b, match)) dsset.add(a) dsset.add(b) match = algorithm( str(a.seq).upper(), str(rcs[b].seq).upper(), self.limit) if match: matches.append((a, rcs[b], match)) dsset.add(a) dsset.add(rcs[b]) matches.append( (rcs[a], b, [(len(a) - sa - le, len(b) - sb - le, le) for sa, sb, le in match])) dsset.add(b) dsset.add(rcs[a]) self.no_of_olaps = 0 for a, b, match in matches: for start_in_a, start_in_b, length in match: self.no_of_olaps += 1 chksum = a[start_in_a:start_in_a + length].seguid() #assert chksum == b[start_in_b:start_in_b+length].seguid() try: fcol, revcol = cols[chksum] except KeyError: fcol = '#%02X%02X%02X' % (random.randint( 175, 255), random.randint( 175, 255), random.randint(175, 255)) rcol = '#%02X%02X%02X' % (random.randint( 175, 255), random.randint( 175, 255), random.randint(175, 255)) cols[chksum] = fcol, rcol qual = { "note": ["olp_{}".format(chksum)], "chksum": [chksum], "ApEinfo_fwdcolor": [fcol], "ApEinfo_revcolor": [rcol] } if not chksum in [ f.qualifiers["chksum"][0] for f in a.features if f.type == "overlap" ]: a.features.append( SeqFeature(FeatureLocation(start_in_a, start_in_a + length), type="overlap", qualifiers=qual)) if not chksum in [ f.qualifiers["chksum"][0] for f in b.features if f.type == "overlap" ]: b.features.append( SeqFeature(FeatureLocation(start_in_b, start_in_b + length), type="overlap", qualifiers=qual)) for ds in dsset: ds.features = sorted([f for f in ds.features], key=operator.attrgetter("location.start")) self.analyzed_dsrecs = list(dsset) # Create graph self.G = nx.MultiDiGraph(multiedges=True, name="original graph", selfloops=False) self.G.add_node('5') self.G.add_node('3') for i, dsrec in enumerate(self.analyzed_dsrecs): overlaps = sorted({ f.qualifiers['chksum'][0]: f for f in dsrec.features if f.type == 'overlap' }.values(), key=operator.attrgetter('location.start')) if overlaps: overlaps = ([ SeqFeature(FeatureLocation(0, 0), type='overlap', qualifiers={'chksum': ['5']}) ] + overlaps + [ SeqFeature(FeatureLocation(len(dsrec), len(dsrec)), type='overlap', qualifiers={'chksum': ['3']}) ]) for olp1, olp2 in itertools.combinations(overlaps, 2): n1 = olp1.qualifiers['chksum'][0] n2 = olp2.qualifiers['chksum'][0] if n1 == '5' and n2 == '3': continue s1, e1, s2, e2 = ( olp1.location.start.position, olp1.location.end.position, olp2.location.start.position, olp2.location.end.position, ) source_fragment = Fragment(dsrec, s1, e1, s2, e2, i) self.G.add_edge(n1, n2, frag=source_fragment, weight=s1 - e1, i=i) #linear assembly linear_products = defaultdict(list) for path in all_simple_paths_edges(self.G, '5', '3', data=True, cutoff=self.max_nodes): pred_frag = copy(path[0][2].values().pop()['frag']) source_fragments = [ pred_frag, ] if pred_frag.start2 < pred_frag.end1: result = pred_frag[pred_frag.start2 + (pred_frag.end1 - pred_frag.start2):pred_frag.end2] else: result = pred_frag[pred_frag.end1:pred_frag.end2] for first_node, second_node, edgedict in path[1:]: edgedict = edgedict.values().pop() f = copy(edgedict['frag']) f.alignment = pred_frag.alignment + pred_frag.start2 - f.start1 source_fragments.append(f) if f.start2 > f.end1: result += f[f.end1:f.end2] else: result += f[f.start2 + (f.end1 - f.start2):f.end2] pred_frag = f add = True for lp in linear_products[len(result)]: if (str(result.seq).lower() == str(lp.seq).lower() or str(result.seq).lower() == str( lp.seq.reverse_complement()).lower()): add = False for dsrec in self.dsrecs: if (str(result.seq).lower() == str(dsrec.seq).lower() or str(result.seq).lower() == str( dsrec.seq.reverse_complement()).lower()): add = False if add: linear_products[len(result)].append( Contig(result, source_fragments)) self.linear_products = list( itertools.chain.from_iterable( linear_products[size] for size in sorted(linear_products, reverse=True))) # circular assembly self.cG = self.G.copy() self.cG.remove_nodes_from(('5', '3')) #circular_products=defaultdict(list) circular_products = {} for pth in all_circular_paths_edges(self.cG): ns = min(enumerate(pth), key=lambda x: x[1][2]['i'])[0] path = pth[ns:] + pth[:ns] pred_frag = copy(path[0][2]['frag']) source_fragments = [ pred_frag, ] if pred_frag.start2 < pred_frag.end1: result = pred_frag[pred_frag.start2 + (pred_frag.end1 - pred_frag.start2):pred_frag.end2] else: result = pred_frag[pred_frag.end1:pred_frag.end2] result.seq = Dseq(str(result.seq)) for first_node, second_node, edgedict in path[1:]: f = copy(edgedict['frag']) f.alignment = pred_frag.alignment + pred_frag.start2 - f.start1 source_fragments.append(f) if f.start2 > f.end1: nxt = f[f.end1:f.end2] else: nxt = f[f.start2 + (f.end1 - f.start2):f.end2] nxt.seq = Dseq(str(nxt.seq)) result += nxt pred_frag = f #add=True #for cp in circular_products[len(result)]: # if (str(result.seq).lower() in str(cp.seq).lower()*2 # or # str(result.seq).lower() == str(cp.seq.reverse_complement()).lower()*2): # pass # add=False # print "##--" #if add: # circular_products[len(result)].append( Contig( Dseqrecord(result, circular=True), source_fragments)) r = Dseqrecord(result, circular=True) circular_products[r.cseguid()] = Contig(r, source_fragments) #self.circular_products = list(itertools.chain.from_iterable(circular_products[size] for size in sorted(circular_products, reverse=True))) self.circular_products = sorted(circular_products.values(), key=len, reverse=True)
def _assemble(self): for dr in self.dsrecs: if dr.name in ("",".", "<unknown name>", None): dr.name = "frag{}".format(len(dr)) if self.only_terminal_overlaps: algorithm = terminal_overlap else: algorithm = common_sub_strings # analyze_overlaps cols = {} for dsrec in self.dsrecs: dsrec.features = [f for f in dsrec.features if f.type!="overlap"] dsrec.seq = Dseq(dsrec.seq.todata) rcs = {dsrec:dsrec.rc() for dsrec in self.dsrecs} matches=[] dsset=OrderedSet() for a, b in itertools.combinations(self.dsrecs, 2): match = algorithm( str(a.seq).upper(), str(b.seq).upper(), self.limit) if match: matches.append((a, b, match)) dsset.add(a) dsset.add(b) match = algorithm( str(a.seq).upper(), str(rcs[b].seq).upper(), self.limit) if match: matches.append((a, rcs[b], match)) dsset.add(a) dsset.add(rcs[b]) matches.append((rcs[a], b, [(len(a)-sa-le,len(b)-sb-le,le) for sa,sb,le in match])) dsset.add(b) dsset.add(rcs[a]) self.no_of_olaps=0 for a, b, match in matches: for start_in_a, start_in_b, length in match: self.no_of_olaps+=1 chksum = a[start_in_a:start_in_a+length].seguid() #assert chksum == b[start_in_b:start_in_b+length].seguid() try: fcol, revcol = cols[chksum] except KeyError: fcol = '#%02X%02X%02X' % (random.randint(175,255),random.randint(175,255),random.randint(175,255)) rcol = '#%02X%02X%02X' % (random.randint(175,255),random.randint(175,255),random.randint(175,255)) cols[chksum] = fcol,rcol qual = {"note" : ["olp_{}".format(chksum)], "chksum" : [chksum], "ApEinfo_fwdcolor" : [fcol], "ApEinfo_revcolor" : [rcol]} if not chksum in [f.qualifiers["chksum"][0] for f in a.features if f.type == "overlap"]: a.features.append( SeqFeature( FeatureLocation(start_in_a, start_in_a + length), type = "overlap", qualifiers = qual)) if not chksum in [f.qualifiers["chksum"][0] for f in b.features if f.type == "overlap"]: b.features.append( SeqFeature( FeatureLocation(start_in_b, start_in_b + length), type = "overlap", qualifiers = qual)) for ds in dsset: ds.features = sorted([f for f in ds.features], key = operator.attrgetter("location.start")) self.analyzed_dsrecs = list(dsset) # Create graph self.G=nx.MultiDiGraph(multiedges=True, name ="original graph" , selfloops=False) self.G.add_node( '5' ) self.G.add_node( '3' ) for i, dsrec in enumerate(self.analyzed_dsrecs): overlaps = sorted( list({f.qualifiers['chksum'][0]:f for f in dsrec.features if f.type=='overlap'}.values()), key = operator.attrgetter('location.start')) if overlaps: overlaps = ([SeqFeature(FeatureLocation(0, 0), type = 'overlap', qualifiers = {'chksum':['5']})]+ overlaps+ [SeqFeature(FeatureLocation(len(dsrec),len(dsrec)), type = 'overlap', qualifiers = {'chksum':['3']})]) for olp1, olp2 in itertools.combinations(overlaps, 2): n1 = olp1.qualifiers['chksum'][0] n2 = olp2.qualifiers['chksum'][0] if n1 == '5' and n2=='3': continue s1,e1,s2,e2 = (olp1.location.start.position, olp1.location.end.position, olp2.location.start.position, olp2.location.end.position,) source_fragment = Fragment(dsrec,s1,e1,s2,e2,i) self.G.add_edge( n1, n2, frag=source_fragment, weight = s1-e1, i = i) #linear assembly linear_products=defaultdict(list) for path in all_simple_paths_edges(self.G, '5', '3', data=True, cutoff=self.max_nodes): pred_frag = copy(list(path[0][2].values()).pop()['frag']) source_fragments = [pred_frag, ] if pred_frag.start2<pred_frag.end1: result=pred_frag[pred_frag.start2+(pred_frag.end1-pred_frag.start2):pred_frag.end2] else: result=pred_frag[pred_frag.end1:pred_frag.end2] for first_node, second_node, edgedict in path[1:]: edgedict = list(edgedict.values()).pop() f = copy(edgedict['frag']) f.alignment = pred_frag.alignment + pred_frag.start2- f.start1 source_fragments.append(f) if f.start2>f.end1: result+=f[f.end1:f.end2] else: result+=f[f.start2+(f.end1-f.start2):f.end2] pred_frag = f add=True for lp in linear_products[len(result)]: if (str(result.seq).lower() == str(lp.seq).lower() or str(result.seq).lower() == str(lp.seq.reverse_complement()).lower()): add=False for dsrec in self.dsrecs: if (str(result.seq).lower() == str(dsrec.seq).lower() or str(result.seq).lower() == str(dsrec.seq.reverse_complement()).lower()): add=False if add: linear_products[len(result)].append(Contig( result, source_fragments)) self.linear_products = list(itertools.chain.from_iterable(linear_products[size] for size in sorted(linear_products, reverse=True))) # circular assembly self.cG = self.G.copy() self.cG.remove_nodes_from(('5','3')) #circular_products=defaultdict(list) circular_products={} for pth in all_circular_paths_edges(self.cG): ns = min(enumerate(pth), key = lambda x:x[1][2]['i'])[0] path = pth[ns:]+pth[:ns] pred_frag = copy(path[0][2]['frag']) source_fragments = [pred_frag, ] if pred_frag.start2<pred_frag.end1: result=pred_frag[pred_frag.start2+(pred_frag.end1-pred_frag.start2):pred_frag.end2] else: result=pred_frag[pred_frag.end1:pred_frag.end2] result.seq = Dseq(str(result.seq)) for first_node, second_node, edgedict in path[1:]: f = copy(edgedict['frag']) f.alignment = pred_frag.alignment + pred_frag.start2- f.start1 source_fragments.append(f) if f.start2>f.end1: nxt = f[f.end1:f.end2] else: nxt =f[f.start2+(f.end1-f.start2):f.end2] nxt.seq = Dseq(str(nxt.seq)) result+=nxt pred_frag = f #add=True #for cp in circular_products[len(result)]: # if (str(result.seq).lower() in str(cp.seq).lower()*2 # or # str(result.seq).lower() == str(cp.seq.reverse_complement()).lower()*2): # pass # add=False # print "##--" #if add: # circular_products[len(result)].append( Contig( Dseqrecord(result, circular=True), source_fragments)) r = Dseqrecord(result, circular=True) circular_products[r.cseguid()] = Contig(r, source_fragments ) #self.circular_products = list(itertools.chain.from_iterable(circular_products[size] for size in sorted(circular_products, reverse=True))) self.circular_products = sorted(list(circular_products.values()), key=len, reverse=True)