def get_neighbors(self, target): nodes = [] if len(self.buckets) == 0: return nodes if len(target) != 20 : return nodes index = self.bucket_index(target) try: nodes = self.buckets[index].nodes min = index - 1 max = index + 1 while len(nodes) < K and ((min >= 0) or (max < len(self.buckets))): if min >= 0: nodes.extend(self.buckets[min].nodes) if max < len(self.buckets): nodes.extend(self.buckets[max].nodes) min -= 1 max += 1 num = intify(target) nodes.sort(lambda a, b, num=num: cmp(num^intify(a.nid), num^intify(b.nid))) return nodes[:K] except IndexError: return nodes
def findCloseNodes(self, target, n=K): """ 找出离目标node ID或infohash最近的前n个node """ nodes = [] if len(self.buckets) == 0: return nodes index = self.bucketIndex(target) try: nodes = self.buckets[index].nodes min = index - 1 max = index + 1 while len(nodes) < n and ((min >= 0) or (max < len(self.buckets))): #如果还能往前走 if min >= 0: nodes.extend(self.buckets[min].nodes) #如果还能往后走 if max < len(self.buckets): nodes.extend(self.buckets[max].nodes) min -= 1 max += 1 #按异或值从小到大排序 num = intify(target) nodes.sort(lambda a, b, num=num: cmp(num ^ intify(a.nid), num ^ intify(b.nid))) return nodes[:n] except IndexError: return nodes
def _solve(self): # Use 8 bits for efficiency set_bits(8) horizontalFences = utils.makeGrid(self.cols, self.rows+1, lambda: BoolVar()) verticalFences = utils.makeGrid(self.rows, self.cols+1, lambda: BoolVar()) utils.require_single_closed_loop_v2(horizontalFences, verticalFences) # Require numbers are surrounded by that many fences. for y in range(self.rows): for x in range(self.cols): if self.board.getCell(x,y) is not None: edges = [ horizontalFences[y][x], horizontalFences[y+1][x], verticalFences[x][y], verticalFences[x+1][y] ] require(sum_bools(self.board.getCell(x,y), edges)) num_solutions = solve(quiet=True) solution = { 'horizontalFences': [[utils.intify(x) for x in r] for r in horizontalFences], 'verticalFences': [[utils.intify(x) for x in r] for r in verticalFences], } return (num_solutions, solution)
def findCloseNodes(self, target, n=K): """ 找出离目标node ID或infohash最近的前n个node """ nodes = [] if len(self.buckets) == 0: return nodes index = self.bucketIndex(target) try: nodes = self.buckets[index].nodes min = index - 1 max = index + 1 while len(nodes) < n and ((min >= 0) or (max < len(self.buckets))): #如果还能往前走 if min >= 0: nodes.extend(self.buckets[min].nodes) #如果还能往后走 if max < len(self.buckets): nodes.extend(self.buckets[max].nodes) min -= 1 max += 1 #按异或值从小到大排序 num = intify(target) nodes.sort(lambda a, b, num=num: cmp(num^intify(a.nid), num^intify(b.nid))) return nodes[:n] except IndexError: return nodes
def get_seqfile_info(fname, is_data, germline_seqs=None, cyst_positions=None, tryp_positions=None, n_max_queries=-1, queries=None, reco_ids=None): """ return list of sequence info from files of several types """ if not is_data: assert germline_seqs is not None assert cyst_positions is not None assert tryp_positions is not None if '.csv' in fname: delimiter = ',' name_column = 'unique_id' seq_column = 'seq' seqfile = opener('r')(fname) reader = csv.DictReader(seqfile, delimiter=delimiter) elif '.tsv' in fname: delimiter = '\t' name_column = 'name' seq_column = 'nucleotide' seqfile = opener('r')(fname) reader = csv.DictReader(seqfile, delimiter=delimiter) elif '.fasta' in fname or '.fa' in fname or '.fastq' in fname or '.fq' in fname: name_column = 'unique_id' seq_column = 'seq' reader = [] n_fasta_queries = 0 ftype = 'fasta' if ('.fasta' in fname or '.fa' in fname) else 'fastq' for seq_record in SeqIO.parse(fname, ftype): reader.append({}) reader[-1][name_column] = seq_record.name reader[-1][seq_column] = str(seq_record.seq).upper() n_fasta_queries += 1 if n_max_queries > 0 and n_fasta_queries >= n_max_queries: break else: print 'ERROR unrecognized file format %s' % fname assert False input_info, reco_info = OrderedDict(), OrderedDict() n_queries = 0 for line in reader: utils.intify(line) # if command line specified query or reco ids, skip other ones if queries is not None and line[name_column] not in queries: continue if reco_ids is not None and line['reco_id'] not in reco_ids: continue input_info[line[name_column]] = {'unique_id':line[name_column], 'seq':line[seq_column]} if not is_data: reco_info[line['unique_id']] = line utils.add_match_info(germline_seqs, line, cyst_positions, tryp_positions) n_queries += 1 if n_max_queries > 0 and n_queries >= n_max_queries: break if len(input_info) == 0: print 'ERROR didn\'t end up pulling any input info out of %s' % fname assert False return (input_info, reco_info)
def _solve(self): horizontalFences = utils.makeGrid(self.cols - 1, self.rows, lambda: BoolVar()) verticalFences = utils.makeGrid(self.rows - 1, self.cols, lambda: BoolVar()) dbg = utils.require_single_closed_loop_v2(horizontalFences, verticalFences) def getHori(x, y): if y < 0 or y >= self.rows: return False if x < 0 or x >= self.cols - 1: return False return horizontalFences[y][x] def getVert(x, y): if y < 0 or y >= self.rows - 1: return False if x < 0 or x >= self.cols: return False return verticalFences[x][y] for y in range(self.rows): for x in range(self.cols): # Requirements on white circles if self.board.getCell(x, y) == 1: horiLeft = getHori(x - 1, y) & getHori( x, y) & (getVert(x - 1, y - 1) | getVert(x - 1, y)) horiRight = getHori(x - 1, y) & getHori( x, y) & (getVert(x + 1, y - 1) | getVert(x + 1, y)) vertTop = getVert(x, y - 1) & getVert( x, y) & (getHori(x - 1, y - 1) | getHori(x, y - 1)) vertBottom = getVert(x, y - 1) & getVert( x, y) & (getHori(x - 1, y + 1) | getHori(x, y + 1)) require(horiLeft | horiRight | vertTop | vertBottom) # Requirements on black circles if self.board.getCell(x, y) == 2: topLeft = getHori(x - 2, y) & getHori(x - 1, y) & getVert( x, y - 1) & getVert(x, y - 2) topRight = getHori(x, y) & getHori(x + 1, y) & getVert( x, y - 1) & getVert(x, y - 2) bottomLeft = getHori(x - 2, y) & getHori( x - 1, y) & getVert(x, y) & getVert(x, y + 1) bottomRight = getHori(x, y) & getHori(x + 1, y) & getVert( x, y) & getVert(x, y + 1) require(topLeft | topRight | bottomLeft | bottomRight) num_solutions = solve(quiet=True) solution = { 'horizontalFences': [[utils.intify(x) for x in r] for r in horizontalFences], 'verticalFences': [[utils.intify(x) for x in r] for r in verticalFences], } return (num_solutions, solution)
def _mean(soup): """Calculates the mean of this row and return an indicator for the period, i.e. whether its for a monthly period, as opposed to yearly. """ selector_td = {'class': 'mean'} selector_span = {'class': 'minor'} mean_td = soup.findAll('td', selector_td)[0] mean_span = mean_td.findAll('span', selector_span)[0] mean = mean_span.text monthly = True if "mo" in mean else False mean = intify(mean) * 12 if monthly else intify(mean) return mean, monthly
def _connections(soup): selector_div = {'id': 'OverviewInsideConnections'} selector_tt = {'class': 'notranslate'} connections_div = soup.findAll('div', selector_div) connections_tt = connections_div[0].findAll('tt', selector_tt) connections = connections_tt[0].text return intify(connections)
def _approval(soup): selector_span = {'class': 'approvalPercent'} selector_tt = {'class': 'notranslate'} approval_span = soup.findAll('span', selector_span)[0] approval_tt = approval_span.findAll('tt', selector_tt)[0] approval = approval_tt.text return intify(approval)
def _reviews(soup): selector_span = {'class': 'numCEORatings minor'} selector_tt = {'class': 'notranslate'} reviews_span = soup.findAll('span', selector_span)[0] reviews_tt = reviews_span.findAll('tt', selector_tt)[0] reviews = reviews_tt.text return intify(reviews)
def bucketIndex(self, target): """ 定位指定node ID 或 infohash 所在的bucket的索引 """ try: return bisect_left(self.buckets, intify(target)) except HashError: raise HashError
def _solve(self): stars = utils.makeGrid(self.cols, self.rows, lambda: BoolVar()) # starCount stars per row for y in range(self.rows): require(sum_bools(self.starCount, stars[y])) # starCount stars per column for x in range(self.cols): require( sum_bools(self.starCount, [stars[y][x] for y in range(self.rows)])) # stars can't be adjacent (including diagonal) for y in range(self.rows): for x in range(self.cols): threeByThree = [ stars[y + dy][x + dx] if 0 <= y + dy < self.rows and 0 <= x + dx < self.cols else False for (dx, dy) in list(itertools.product([-1, 0, 1], [-1, 0, 1])) ] require(cond(stars[y][x], sum_bools(1, threeByThree), True)) # starCount stars per region. usedInRegion = utils.makeGrid(self.cols, self.rows, lambda: False) for y in range(self.rows): for x in range(self.cols): if usedInRegion[y][x]: continue toProcess = [(x, y)] region = [] while len(toProcess): newToProcess = [] for p in toProcess: usedInRegion[p[1]][p[0]] = True region.append(stars[p[1]][p[0]]) for p in toProcess: if p[0] - 1 >= 0 and not usedInRegion[p[1]][ p[0] - 1] and not self.board.border[1][p[1]][p[0] - 1]: newToProcess.append((p[0] - 1, p[1])) if p[0] + 1 < self.cols and not usedInRegion[p[1]][ p[0] + 1] and not self.board.border[1][p[1]][p[0]]: newToProcess.append((p[0] + 1, p[1])) if p[1] - 1 >= 0 and not usedInRegion[p[1] - 1][ p[0]] and not self.board.border[0][p[1] - 1][p[0]]: newToProcess.append((p[0], p[1] - 1)) if p[1] + 1 < self.rows and not usedInRegion[p[1] + 1][ p[0]] and not self.board.border[0][p[1]][p[0]]: newToProcess.append((p[0], p[1] + 1)) toProcess = list(set(newToProcess)) require(sum_bools(self.starCount, region)) num_solutions = solve(quiet=True) solution = [[utils.intify(x) for x in r] for r in stars] return (num_solutions, solution)
def _solve(self): ans = utils.makeGrid(self.cols, self.rows, lambda: BoolVar()) lit = utils.makeGrid(self.cols, self.rows, lambda: Atom()) for y in range(self.rows): for x in range(self.cols): # Lights can't be on black squares cell = self.board.getCell(x, y) if cell != None: require(~ans[y][x]) # Require the right number of neighbors for lights if cell != None and cell >= 0: neighbors = [ ans[y - 1][x] if y - 1 >= 0 else False, ans[y + 1][x] if y + 1 < self.rows else False, ans[y][x - 1] if x - 1 >= 0 else False, ans[y][x + 1] if x + 1 < self.cols else False, ] require(sum_bools(cell, neighbors)) # Prevent lights from shining on each other and ensure each non-black cell is lit if cell == None: require(lit[y][x]) if y == 0 or self.board.getCell(x, y - 1) != None: arr = [] litArr = [] yy = y while yy < self.rows and self.board.getCell( x, yy) == None: arr.append(ans[yy][x]) litArr.append(lit[yy][x]) yy += 1 require(at_most(1, arr)) for a in arr: for l in litArr: l.prove_if(a) if x == 0 or self.board.getCell(x - 1, y) != None: arr = [] litArr = [] xx = x while xx < self.cols and self.board.getCell(xx, y) == None: arr.append(ans[y][xx]) litArr.append(lit[y][xx]) xx += 1 require(at_most(1, arr)) for a in arr: for l in litArr: l.prove_if(a) num_solutions = solve(quiet=True) solution = [ utils.intify(ans[i / self.cols][i % self.cols]) for i in range(self.cols * self.rows) ] return (num_solutions, solution)
def _solve(self): ans = utils.makeGrid(9,9,lambda: IntVar(1,9)) for i in range(9): require_all_diff(ans[i]) require_all_diff([ans[j][i] for j in range(9)]) require_all_diff([ans[j/3+i/3*3][j%3+i%3*3] for j in range(9)]) for y in range(9): for x in range(9): if self.board.getCell(x,y) is not None: require(ans[y][x] == self.board.getCell(x,y)) num_solutions = solve(quiet=True) solution = [utils.intify(ans[i/9][i%9]) for i in range(81)] return (num_solutions, solution)
def read_hmm_output(self, algorithm, hmm_csv_outfname, make_clusters=True, count_parameters=False, parameter_out_dir=None, plotdir=None): print ' read output' if count_parameters: assert parameter_out_dir is not None assert plotdir is not None pcounter = ParameterCounter(self.germline_seqs) if count_parameters else None true_pcounter = ParameterCounter(self.germline_seqs) if (count_parameters and not self.args.is_data) else None perfplotter = PerformancePlotter(self.germline_seqs, plotdir + '/hmm/performance', 'hmm') if self.args.plot_performance else None n_processed = 0 hmminfo = [] with opener('r')(hmm_csv_outfname) as hmm_csv_outfile: reader = csv.DictReader(hmm_csv_outfile) last_key = None boundary_error_queries = [] for line in reader: utils.intify(line, splitargs=('unique_ids', 'seqs')) ids = line['unique_ids'] this_key = utils.get_key(ids) same_event = from_same_event(self.args.is_data, True, self.reco_info, ids) id_str = ''.join(['%20s ' % i for i in ids]) # check for errors if last_key != this_key: # if this is the first line for this set of ids (i.e. the best viterbi path or only forward score) if line['errors'] != None and 'boundary' in line['errors'].split(':'): boundary_error_queries.append(':'.join([str(uid) for uid in ids])) else: assert len(line['errors']) == 0 if algorithm == 'viterbi': line['seq'] = line['seqs'][0] # add info for the best match as 'seq' line['unique_id'] = ids[0] utils.add_match_info(self.germline_seqs, line, self.cyst_positions, self.tryp_positions, debug=(self.args.debug > 0)) if last_key != this_key or self.args.plot_all_best_events: # if this is the first line (i.e. the best viterbi path) for this query (or query pair), print the true event n_processed += 1 if self.args.debug: print '%s %d' % (id_str, same_event) if line['cdr3_length'] != -1 or not self.args.skip_unproductive: # if it's productive, or if we're not skipping unproductive rearrangements hmminfo.append(dict([('unique_id', line['unique_ids'][0]), ] + line.items())) if pcounter is not None: # increment counters (but only for the best [first] match) pcounter.increment(line) if true_pcounter is not None: # increment true counters true_pcounter.increment(self.reco_info[ids[0]]) if perfplotter is not None: perfplotter.evaluate(self.reco_info[ids[0]], line) if self.args.debug: self.print_hmm_output(line, print_true=(last_key != this_key), perfplotter=perfplotter) line['seq'] = None line['unique_id'] = None else: # for forward, write the pair scores to file to be read by the clusterer if not make_clusters: # self.args.debug or print '%3d %10.3f %s' % (same_event, float(line['score']), id_str) if line['score'] == '-nan': print ' WARNING encountered -nan, setting to -999999.0' score = -999999.0 else: score = float(line['score']) if len(ids) == 2: hmminfo.append({'id_a':line['unique_ids'][0], 'id_b':line['unique_ids'][1], 'score':score}) n_processed += 1 last_key = utils.get_key(ids) if pcounter is not None: pcounter.write(parameter_out_dir) if not self.args.no_plot: pcounter.plot(plotdir, subset_by_gene=True, cyst_positions=self.cyst_positions, tryp_positions=self.tryp_positions) if true_pcounter is not None: true_pcounter.write(parameter_out_dir + '/true') if not self.args.no_plot: true_pcounter.plot(plotdir + '/true', subset_by_gene=True, cyst_positions=self.cyst_positions, tryp_positions=self.tryp_positions) if perfplotter is not None: perfplotter.plot() print ' processed %d queries' % n_processed if len(boundary_error_queries) > 0: print ' %d boundary errors (%s)' % (len(boundary_error_queries), ', '.join(boundary_error_queries)) return hmminfo
def _ratings(soup): """Number of times this company has been rated by employees""" ratings = soup.findAll('h3')[0] selector = {'class': 'notranslate'} ratings = ratings.findAll('span', selector)[0] return intify(ratings.text.strip())
def _size(soup): selector_div = {'class': 'moreData margTop5 subtle'} selector = {'class': 'notranslate'} size_div = soup.findAll('div', selector_div)[0] sizes = size_div.findAll('tt', selector) return [intify(size.text) for size in sizes]
def _reviews(soup): selector_outer = {'class': 'numReviews subtle'} selector = {'class': 'txtShadowWhite'} reviews_outer = soup.findAll('span', selector_outer)[0] reviews = reviews_outer.findAll('span', selector)[0] return intify(reviews.text)
def _solve(self): top = self.board.excell[0:self.cols] bottom = self.board.excell[self.cols:2 * self.cols] left = self.board.excell[2 * self.cols:2 * self.cols + self.rows] right = self.board.excell[2 * self.cols + self.rows:] ans = utils.makeGrid(self.cols, self.rows, lambda: IntVar(1, self.cols)) topViewable = utils.makeGrid(self.cols, self.rows, lambda: Atom()) bottomViewable = utils.makeGrid(self.cols, self.rows, lambda: Atom()) leftViewable = utils.makeGrid(self.cols, self.rows, lambda: Atom()) rightViewable = utils.makeGrid(self.cols, self.rows, lambda: Atom()) # All numbers in columns and rows are distinct for i in range(self.rows): require_all_diff(ans[i]) for i in range(self.cols): require_all_diff([ans[j][i] for j in range(self.rows)]) # Prove skyscraper viewability for y in range(self.rows): for x in range(self.cols): t = True for k in range(y): t = t & (ans[y][x] > ans[k][x]) topViewable[y][x].prove_if(t) b = True for k in range(y + 1, self.rows): b = b & (ans[y][x] > ans[k][x]) bottomViewable[y][x].prove_if(b) l = True for k in range(x): l = l & (ans[y][x] > ans[y][k]) leftViewable[y][x].prove_if(l) r = True for k in range(x + 1, self.cols): r = r & (ans[y][x] > ans[y][k]) rightViewable[y][x].prove_if(r) # Ensure skyscraper viewable numbers for x in range(self.cols): if top[x] != None: require( sum_bools(top[x], [topViewable[y][x] for y in range(self.rows)])) if bottom[x] != None: require( sum_bools(bottom[x], [bottomViewable[y][x] for y in range(self.rows)])) for y in range(self.rows): if left[y] != None: require( sum_bools(left[y], [leftViewable[y][x] for x in range(self.cols)])) if right[y] != None: require( sum_bools(right[y], [rightViewable[y][x] for x in range(self.cols)])) num_solutions = solve(quiet=True) solution = [ utils.intify(ans[i / self.cols][i % self.cols]) for i in range(self.rows * self.cols) ] return (num_solutions, solution)
def bucketIndex(self, target): """ 定位指定node ID 或 infohash 所在的bucket的索引 """ return bisect_left(self.buckets, intify(target))
def in_range(self, target): return self.min <= intify(target) < self.max
def inRange(self, target): """目标node ID是否在该范围里""" return self.min <= intify(target) < self.max
def read_hmm_output(self, algorithm, hmm_csv_outfname, make_clusters=True, count_parameters=False, parameter_out_dir=None, plotdir=None): print ' read output' if count_parameters: assert parameter_out_dir is not None assert plotdir is not None pcounter = ParameterCounter( self.germline_seqs) if count_parameters else None true_pcounter = ParameterCounter(self.germline_seqs) if ( count_parameters and not self.args.is_data) else None perfplotter = PerformancePlotter( self.germline_seqs, plotdir + '/hmm/performance', 'hmm') if self.args.plot_performance else None n_processed = 0 hmminfo = [] with opener('r')(hmm_csv_outfname) as hmm_csv_outfile: reader = csv.DictReader(hmm_csv_outfile) last_key = None boundary_error_queries = [] for line in reader: utils.intify(line, splitargs=('unique_ids', 'seqs')) ids = line['unique_ids'] this_key = utils.get_key(ids) same_event = from_same_event(self.args.is_data, True, self.reco_info, ids) id_str = ''.join(['%20s ' % i for i in ids]) # check for errors if last_key != this_key: # if this is the first line for this set of ids (i.e. the best viterbi path or only forward score) if line['errors'] != None and 'boundary' in line[ 'errors'].split(':'): boundary_error_queries.append(':'.join( [str(uid) for uid in ids])) else: assert len(line['errors']) == 0 if algorithm == 'viterbi': line['seq'] = line['seqs'][ 0] # add info for the best match as 'seq' line['unique_id'] = ids[0] utils.add_match_info(self.germline_seqs, line, self.cyst_positions, self.tryp_positions, debug=(self.args.debug > 0)) if last_key != this_key or self.args.plot_all_best_events: # if this is the first line (i.e. the best viterbi path) for this query (or query pair), print the true event n_processed += 1 if self.args.debug: print '%s %d' % (id_str, same_event) if line['cdr3_length'] != -1 or not self.args.skip_unproductive: # if it's productive, or if we're not skipping unproductive rearrangements hmminfo.append( dict([ ('unique_id', line['unique_ids'][0]), ] + line.items())) if pcounter is not None: # increment counters (but only for the best [first] match) pcounter.increment(line) if true_pcounter is not None: # increment true counters true_pcounter.increment(self.reco_info[ids[0]]) if perfplotter is not None: perfplotter.evaluate(self.reco_info[ids[0]], line) if self.args.debug: self.print_hmm_output( line, print_true=(last_key != this_key), perfplotter=perfplotter) line['seq'] = None line['unique_id'] = None else: # for forward, write the pair scores to file to be read by the clusterer if not make_clusters: # self.args.debug or print '%3d %10.3f %s' % ( same_event, float(line['score']), id_str) if line['score'] == '-nan': print ' WARNING encountered -nan, setting to -999999.0' score = -999999.0 else: score = float(line['score']) if len(ids) == 2: hmminfo.append({ 'id_a': line['unique_ids'][0], 'id_b': line['unique_ids'][1], 'score': score }) n_processed += 1 last_key = utils.get_key(ids) if pcounter is not None: pcounter.write(parameter_out_dir) if not self.args.no_plot: pcounter.plot(plotdir, subset_by_gene=True, cyst_positions=self.cyst_positions, tryp_positions=self.tryp_positions) if true_pcounter is not None: true_pcounter.write(parameter_out_dir + '/true') if not self.args.no_plot: true_pcounter.plot(plotdir + '/true', subset_by_gene=True, cyst_positions=self.cyst_positions, tryp_positions=self.tryp_positions) if perfplotter is not None: perfplotter.plot() print ' processed %d queries' % n_processed if len(boundary_error_queries) > 0: print ' %d boundary errors (%s)' % ( len(boundary_error_queries), ', '.join(boundary_error_queries)) return hmminfo
def _solve(self): set_max_val(self.rows * self.cols) ans = utils.makeGrid(self.cols, self.rows, lambda: IntVar()) for y in range(self.rows): for x in range(self.cols): if self.board.getCell(x, y) != None: require(ans[y][x] == self.board.getCell(x, y)) # Count number of same neighbors for special restrictions on 1's, 2's, 3's, and 4's. edgesHorizontal = [[] for i in range(self.rows)] edgesVertical = [[] for i in range(self.rows - 1)] sameNeighbors = [[] for i in range(self.rows)] for y in range(self.rows): for x in range(self.cols): if y + 1 < self.rows: edgesVertical[y].append(ans[y][x] == ans[y + 1][x]) if x + 1 < self.cols: edgesHorizontal[y].append(ans[y][x] == ans[y][x + 1]) for y in range(self.rows): for x in range(self.cols): sameVars = [] if y - 1 >= 0: sameVars.append(edgesVertical[y - 1][x]) if y + 1 < self.rows: sameVars.append(edgesVertical[y][x]) if x - 1 >= 0: sameVars.append(edgesHorizontal[y][x - 1]) if x + 1 < self.cols: sameVars.append(edgesHorizontal[y][x]) sameNeighbors[y].append(sum_vars(sameVars)) for y in range(self.rows): for x in range(self.cols): require(~((ans[y][x] == 1) ^ (sameNeighbors[y][x] == 0))) require(~((ans[y][x] == 2) & (sameNeighbors[y][x] != 1))) require(~((ans[y][x] == 3) & (sameNeighbors[y][x] != 1) & (sameNeighbors[y][x] != 2))) require(~((ans[y][x] == 4) & (sameNeighbors[y][x] == 4))) flow = utils.makeGrid(self.cols, self.rows, lambda: MultiVar('.', '>', '<', 'v', '^')) # Require cells that flow into each other to have the same value. for y in range(self.rows): for x in range(self.cols): if y - 1 >= 0: require( cond(flow[y][x] == '^', edgesVertical[y - 1][x], True)) else: require(flow[y][x] != '^') if y + 1 < self.rows: require(cond(flow[y][x] == 'v', edgesVertical[y][x], True)) else: require(flow[y][x] != 'v') if x - 1 >= 0: require( cond(flow[y][x] == '<', edgesHorizontal[y][x - 1], True)) else: require(flow[y][x] != '<') if x + 1 < self.cols: require( cond(flow[y][x] == '>', edgesHorizontal[y][x], True)) else: require(flow[y][x] != '>') # Require each cell connected to a root connected = utils.makeGrid(self.cols, self.rows, lambda: Atom()) for y in range(self.rows): for x in range(self.cols): require(connected[y][x]) connected[y][x].prove_if(flow[y][x] == '.') if y - 1 >= 0: connected[y][x].prove_if((flow[y][x] == '^') & connected[y - 1][x]) if y + 1 < self.rows: connected[y][x].prove_if((flow[y][x] == 'v') & connected[y + 1][x]) if x - 1 >= 0: connected[y][x].prove_if((flow[y][x] == '<') & connected[y][x - 1]) if x + 1 < self.cols: connected[y][x].prove_if((flow[y][x] == '>') & connected[y][x + 1]) # Count each spanning tree and make sure the cell numbers match counts = utils.makeGrid(self.cols, self.rows, lambda: IntVar()) for y in range(self.rows): for x in range(self.cols): count = 1 if y - 1 >= 0: count += cond((flow[y - 1][x] == 'v'), counts[y - 1][x], 0) if y + 1 < self.rows: count += cond((flow[y + 1][x] == '^'), counts[y + 1][x], 0) if x - 1 >= 0: count += cond((flow[y][x - 1] == '>'), counts[y][x - 1], 0) if x + 1 < self.cols: count += cond((flow[y][x + 1] == '<'), counts[y][x + 1], 0) require(counts[y][x] == count) require(~((flow[y][x] == '.') ^ (counts[y][x] == ans[y][x]))) # Ensure different groups with the same number don't touch rootIdxs = [[IntVar(0, y * self.cols + x) for x in range(self.cols)] for y in range(self.rows)] for y in range(self.rows): for x in range(self.cols): require(~((flow[y][x] == '.') ^ (rootIdxs[y][x] == y * self.cols + x))) if y - 1 >= 0: require(~((ans[y][x] == ans[y - 1][x]) ^ (rootIdxs[y][x] == rootIdxs[y - 1][x]))) if y + 1 < self.rows: require(~((ans[y][x] == ans[y + 1][x]) ^ (rootIdxs[y][x] == rootIdxs[y + 1][x]))) if x - 1 >= 0: require(~((ans[y][x] == ans[y][x - 1]) ^ (rootIdxs[y][x] == rootIdxs[y][x - 1]))) if x + 1 < self.cols: require(~((ans[y][x] == ans[y][x + 1]) ^ (rootIdxs[y][x] == rootIdxs[y][x + 1]))) num_solutions = solve(quiet=True) solution = [ utils.intify(ans[i / self.cols][i % self.cols]) for i in range(self.rows * self.cols) ] return (num_solutions, solution)
def _normalize(range_, monthly): """nomalize: multiply ranges by # months or by $1k""" period = 12 if monthly else 1000 low, high = (intify(v) * period for v in range_) return low, high
def _samples(soup): selector = {'class': 'rowCounts'} rows = soup.findAll('p', selector)[0] samples = rows.findAll('tt')[0].text return intify(samples)
def bucket_index(self, target): return bisect_left(self.buckets, intify(target))
def get_seqfile_info(fname, is_data, germline_seqs=None, cyst_positions=None, tryp_positions=None, n_max_queries=-1, queries=None, reco_ids=None): """ return list of sequence info from files of several types """ if not is_data: assert germline_seqs is not None assert cyst_positions is not None assert tryp_positions is not None if '.csv' in fname: delimiter = ',' name_column = 'unique_id' seq_column = 'seq' seqfile = opener('r')(fname) reader = csv.DictReader(seqfile, delimiter=delimiter) elif '.tsv' in fname: delimiter = '\t' name_column = 'name' seq_column = 'nucleotide' seqfile = opener('r')(fname) reader = csv.DictReader(seqfile, delimiter=delimiter) elif '.fasta' in fname or '.fa' in fname or '.fastq' in fname or '.fq' in fname: name_column = 'unique_id' seq_column = 'seq' reader = [] n_fasta_queries = 0 ftype = 'fasta' if ('.fasta' in fname or '.fa' in fname) else 'fastq' for seq_record in SeqIO.parse(fname, ftype): reader.append({}) reader[-1][name_column] = seq_record.name reader[-1][seq_column] = str(seq_record.seq).upper() n_fasta_queries += 1 if n_max_queries > 0 and n_fasta_queries >= n_max_queries: break else: print 'ERROR unrecognized file format %s' % fname assert False input_info, reco_info = OrderedDict(), OrderedDict() n_queries = 0 for line in reader: utils.intify(line) # if command line specified query or reco ids, skip other ones if queries is not None and line[name_column] not in queries: continue if reco_ids is not None and line['reco_id'] not in reco_ids: continue input_info[line[name_column]] = { 'unique_id': line[name_column], 'seq': line[seq_column] } if not is_data: reco_info[line['unique_id']] = line utils.add_match_info(germline_seqs, line, cyst_positions, tryp_positions) n_queries += 1 if n_max_queries > 0 and n_queries >= n_max_queries: break if len(input_info) == 0: print 'ERROR didn\'t end up pulling any input info out of %s' % fname assert False return (input_info, reco_info)
def _solve(self): set_max_val(3) horiAns = utils.makeGrid(self.cols - 1, self.rows, lambda: IntVar(0, 2)) vertAns = utils.makeGrid(self.cols, self.rows - 1, lambda: IntVar(0, 2)) # Require bridges extend all the way and don't intersect for y in range(self.rows): for x in range(self.cols - 1): require(cond( horiAns[y][x] > 0, ((self.board.getCell(x,y) != None) | (horiAns[y][x-1] == horiAns[y][x] if x-1 >= 0 else False)) &\ ((self.board.getCell(x+1,y) != None) | (horiAns[y][x+1] == horiAns[y][x] if x+1 < self.cols-1 else False)), True)) require( cond(horiAns[y][x] > 0, (self.board.getCell(x, y) == None) | (self.board.getCell(x + 1, y) == None), True)) for y in range(self.rows - 1): for x in range(self.cols): require(cond( vertAns[y][x] > 0, ((self.board.getCell(x,y) != None) | (vertAns[y-1][x] == vertAns[y][x] if y-1 >= 0 else False)) &\ ((self.board.getCell(x,y+1) != None) | (vertAns[y+1][x] == vertAns[y][x] if y+1 < self.rows-1 else False)), True)) require( cond(vertAns[y][x] > 0, (self.board.getCell(x, y) == None) | (self.board.getCell(x, y + 1) == None), True)) # Require numbers to match up for y in range(self.rows): for x in range(self.cols): cell = self.board.getCell(x, y) connections = [ horiAns[y][x - 1] if x - 1 >= 0 else 0, horiAns[y][x] if x < self.cols - 1 else 0, vertAns[y - 1][x] if y - 1 >= 0 else 0, vertAns[y][x] if y < self.rows - 1 else 0, ] if cell != None: require(sum_vars(connections) == cell) # Disallow bridge intersections for y in range(1, self.rows - 1): for x in range(1, self.cols - 1): if self.board.getCell(x, y) == None: require((horiAns[y][x - 1] == 0) | (horiAns[y][x] == 0) | (vertAns[y - 1][x] == 0) | (vertAns[y][x] == 0)) # Require all islands to be connected isConnected = utils.makeGrid(self.cols, self.rows, lambda: Atom()) firstIsland = False for y in range(self.rows): for x in range(self.cols): if self.board.getCell(x, y): require(isConnected[y][x]) if not firstIsland: isConnected[y][x].prove_if(True) firstIsland = True # Left for xx in range(x - 1, -1, -1): if self.board.getCell(xx, y): isConnected[y][x].prove_if( isConnected[y][xx] & horiAns[y][x - 1] > 0) break # Right for xx in range(x + 1, self.cols): if self.board.getCell(xx, y): isConnected[y][x].prove_if( isConnected[y][xx] & horiAns[y][x] > 0) break # Top for yy in range(y - 1, -1, -1): if self.board.getCell(x, yy): isConnected[y][x].prove_if( isConnected[yy][x] & vertAns[y - 1][x] > 0) break # Down for yy in range(y + 1, self.rows): if self.board.getCell(x, yy): isConnected[y][x].prove_if( isConnected[yy][x] & vertAns[y][x] > 0) break num_solutions = solve(quiet=True) solution = [ [ utils.intify(horiAns[i / (self.cols - 1)][i % (self.cols - 1)]) for i in range((self.cols - 1) * self.rows) ], [ utils.intify(vertAns[i / self.cols][i % self.cols]) for i in range(self.cols * (self.rows - 1)) ], ] return (num_solutions, solution)