def load_previous(agp_file, nodes): ''' loads info from previous AGP''' # create node lookup. lookup = create_lookup(nodes) # load the agp array. agp_edges = load_agps(agp_file) # ensure sorted by scaffname and scafidx. agp_edges.sort(order=['scaf_name','scaf_idx']) # build list of component offsets. orien = dict() offsets = dict() for i in range(agp_edges.size): # skip non contigs. if agp_edges[i]['comp_type'] != "W": continue # save orientation. orien[lookup[agp_edges[i]['comp_name']]] = agp_edges[i]['comp_orien'] # record index. if agp_edges[i]['scaf_name'] not in offsets: offsets[agp_edges[i]['scaf_name']] = list() offsets[agp_edges[i]['scaf_name']].append(i) # add bundle info to this. gaps = dict() active = set() for key in offsets: # loop over edges. for i in range(len(offsets[key]) - 1): # get AGP edge. ea = agp_edges[offsets[key][i]] eb = agp_edges[offsets[key][i+1]] # get index. idxa = lookup[ea['comp_name']] idxb = lookup[eb['comp_name']] # get gap. gaps[(idxa,idxb)] = eb['scaf_start'] - ea['scaf_stop'] # note its active. active.add((idxa,idxb)) # return gaps, active set and node set. return gaps
def call_agp_gaps(agp_file, nodes): ''' calls agp gaps''' # create node lookup. lookup = create_lookup(nodes) # load the agp array. agp_edges = load_agps(agp_file) # ensure sorted by scaffname and scafidx. agp_edges.sort(order=['scaf_name','scaf_idx']) # build list of component offsets. offsets = dict() for i in range(agp_edges.size): # skip non contigs. if agp_edges[i]['comp_type'] != "W": continue # record index. if agp_edges[i]['scaf_name'] not in offsets: offsets[agp_edges[i]['scaf_name']] = list() offsets[agp_edges[i]['scaf_name']].append(i) # add bundle info to this. gaps = dict() for key in offsets: # loop over edges. for i in range(len(offsets[key]) - 1): # get AGP edge. ea = agp_edges[offsets[key][i]] eb = agp_edges[offsets[key][i+1]] # get index. idxa = lookup[ea['comp_name']] idxb = lookup[eb['comp_name']] # get gap. gaps[(idxa,idxb)] = eb['scaf_start'] - ea['scaf_stop'] return gaps
def apply_agp(self, bundles, agp_file): ''' applies an AGP solution ''' # sanity check. if self._sol_added > 0: logging.error("can't apply AGP after solutions added") sys.exit(1) # load the agp array. agp_edges = load_agps(agp_file) # ensure sorted by scaffname and scafidx. agp_edges.sort(order=['scaf_name','scaf_idx']) # apply orientation solutions. for i in range(agp_edges.size): # skip non contigs. if agp_edges[i]['comp_type'] != "W": continue # lookup index. idxa = self._nindex(agp_edges[i]['comp_name']) # apply orientation. self._sol_nodes[idxa]['idx'] = idxa self._sol_nodes[idxa]['orien'] = agp_edges[i]['comp_orien'] # add to added var. self._nodes_added.add(idxa) # build list of component offsets. offsets = dict() for i in range(agp_edges.size): # skip non contigs. if agp_edges[i]['comp_type'] != "W": continue # record index. if agp_edges[i]['scaf_name'] not in offsets: offsets[agp_edges[i]['scaf_name']] = list() offsets[agp_edges[i]['scaf_name']].append(i) # grow bundle array by this size. to_grow = 0 idxbun = self._sol_bundles.size for key in offsets: to_grow += len(offsets[key]) - 1 self._sol_bundles.resize(idxbun + to_grow) # add bundle info to this. gaps = dict() for key in offsets: # loop over edges. for i in range(len(offsets[key]) - 1): # get AGP edge. ea = agp_edges[offsets[key][i]] eb = agp_edges[offsets[key][i+1]] # get index. idxa = self._nindex(ea['comp_name']) idxb = self._nindex(eb['comp_name']) # get gap. gaps[(idxa,idxb)] = eb['scaf_start'] - ea['scaf_stop'] # add to bundles. self._sol_bundles[idxbun]['idxa'] = idxa self._sol_bundles[idxbun]['idxb'] = idxb self._sol_bundles[idxbun]['X'] = 1 idxbun += 1 # default the state variables. self._sol_bundles[:]['S'] = -1 self._sol_bundles[:]['A'] = -1 self._sol_bundles[:]['B'] = -1 self._sol_bundles[:]['C'] = -1 self._sol_bundles[:]['D'] = -1 # return the gap estimates. return gaps
subprocess.call(["mkdir",file_path]) def make_key(a, b): ''' makes sorted key''' if a < b: return (a,b) else: return (b,a) ########### script ################## # load hdf5 information. logging.info("loading data arrays") nodes = load_nodes(input_nodes_file) edges = load_edges(input_edges_file) agps = load_agps(input_agp_file) nlookup = create_lookup(nodes) # build bundle count. logging.info("counting bundles") blookup = dict() for i in range(edges.size): # get id idxa = edges[i]['ctg_a_idx'] idxb = edges[i]['ctg_b_idx'] key = make_key(idxa, idxb) # count it. if key not in blookup: