예제 #1
0
파일: spqr_flow.py 프로젝트: jim-bo/SINAH
def load_previous(agp_file, nodes):
	''' loads info from previous AGP'''
	
	# create node lookup.
	lookup = create_lookup(nodes)
	
	# load the agp array.
	agp_edges = load_agps(agp_file)
	
	# ensure sorted by scaffname and scafidx.
	agp_edges.sort(order=['scaf_name','scaf_idx'])
	
	# build list of component offsets.
	orien = dict()
	offsets = dict()
	for i in range(agp_edges.size):

		# skip non contigs.
		if agp_edges[i]['comp_type'] != "W": continue
		
		# save orientation.
		orien[lookup[agp_edges[i]['comp_name']]] = agp_edges[i]['comp_orien']
		
		# record index.
		if agp_edges[i]['scaf_name'] not in offsets:
			offsets[agp_edges[i]['scaf_name']] = list()
		offsets[agp_edges[i]['scaf_name']].append(i)
		
	# add bundle info to this.
	gaps = dict()
	active = set()
	for key in offsets:
		
		# loop over edges.
		for i in range(len(offsets[key]) - 1):
			
			# get AGP edge.
			ea = agp_edges[offsets[key][i]]
			eb = agp_edges[offsets[key][i+1]]
			
			# get index.
			idxa = lookup[ea['comp_name']]
			idxb = lookup[eb['comp_name']]
			
			# get gap.
			gaps[(idxa,idxb)] = eb['scaf_start'] - ea['scaf_stop']
			
			# note its active.
			active.add((idxa,idxb))
			
	# return gaps, active set and node set.
	return gaps
예제 #2
0
파일: spqr_flow.py 프로젝트: jim-bo/SINAH
def call_agp_gaps(agp_file, nodes):
	''' calls agp gaps'''
	
	# create node lookup.
	lookup = create_lookup(nodes)
	
	# load the agp array.
	agp_edges = load_agps(agp_file)
	
	# ensure sorted by scaffname and scafidx.
	agp_edges.sort(order=['scaf_name','scaf_idx'])
	
	# build list of component offsets.
	offsets = dict()
	for i in range(agp_edges.size):

		# skip non contigs.
		if agp_edges[i]['comp_type'] != "W": continue
		
		# record index.
		if agp_edges[i]['scaf_name'] not in offsets:
			offsets[agp_edges[i]['scaf_name']] = list()
		offsets[agp_edges[i]['scaf_name']].append(i)
		
		# add bundle info to this.
		gaps = dict()
		for key in offsets:
			
			# loop over edges.
			for i in range(len(offsets[key]) - 1):
				
				# get AGP edge.
				ea = agp_edges[offsets[key][i]]
				eb = agp_edges[offsets[key][i+1]]
				
				# get index.
				idxa = lookup[ea['comp_name']]
				idxb = lookup[eb['comp_name']]
				
				# get gap.
				gaps[(idxa,idxb)] = eb['scaf_start'] - ea['scaf_stop']
				
	return gaps
예제 #3
0
파일: solution.py 프로젝트: jim-bo/SINAH
	def apply_agp(self, bundles, agp_file):
		''' applies an AGP solution '''
		
		# sanity check.
		if self._sol_added > 0:
			logging.error("can't apply AGP after solutions added")
			sys.exit(1)
		
		# load the agp array.
		agp_edges = load_agps(agp_file)
		
		# ensure sorted by scaffname and scafidx.
		agp_edges.sort(order=['scaf_name','scaf_idx'])
		
		# apply orientation solutions.
		for i in range(agp_edges.size):
			
			# skip non contigs.
			if agp_edges[i]['comp_type'] != "W": continue
			
			# lookup index.
			idxa = self._nindex(agp_edges[i]['comp_name'])
			
			# apply orientation.
			self._sol_nodes[idxa]['idx'] = idxa
			self._sol_nodes[idxa]['orien'] = agp_edges[i]['comp_orien']
			
			# add to added var.
			self._nodes_added.add(idxa)
			
		# build list of component offsets.
		offsets = dict()
		for i in range(agp_edges.size):

			# skip non contigs.
			if agp_edges[i]['comp_type'] != "W": continue
			
			# record index.
			if agp_edges[i]['scaf_name'] not in offsets:
				offsets[agp_edges[i]['scaf_name']] = list()
			offsets[agp_edges[i]['scaf_name']].append(i)
					
			
		# grow bundle array by this size.
		to_grow = 0
		idxbun = self._sol_bundles.size
		for key in offsets:
			to_grow += len(offsets[key]) - 1
		self._sol_bundles.resize(idxbun + to_grow)
		
		# add bundle info to this.
		gaps = dict()
		for key in offsets:
			
			# loop over edges.
			for i in range(len(offsets[key]) - 1):
				
				# get AGP edge.
				ea = agp_edges[offsets[key][i]]
				eb = agp_edges[offsets[key][i+1]]
				
				# get index.
				idxa = self._nindex(ea['comp_name'])
				idxb = self._nindex(eb['comp_name'])
				
				# get gap.
				gaps[(idxa,idxb)] = eb['scaf_start'] - ea['scaf_stop']
				
				# add to bundles.
				self._sol_bundles[idxbun]['idxa'] = idxa
				self._sol_bundles[idxbun]['idxb'] = idxb
				self._sol_bundles[idxbun]['X'] = 1
				idxbun += 1
				
		# default the state variables.
		self._sol_bundles[:]['S'] = -1
		self._sol_bundles[:]['A'] = -1
		self._sol_bundles[:]['B'] = -1
		self._sol_bundles[:]['C'] = -1
		self._sol_bundles[:]['D'] = -1

		# return the gap estimates.
		return gaps
예제 #4
0
파일: high_bundle.py 프로젝트: jim-bo/SINAH
        subprocess.call(["mkdir",file_path])

def make_key(a, b):
    ''' makes sorted key'''
    if a < b:
        return (a,b)
    else:
        return (b,a)

########### script ################## 

# load hdf5 information.
logging.info("loading data arrays")
nodes = load_nodes(input_nodes_file)
edges = load_edges(input_edges_file)
agps = load_agps(input_agp_file)

nlookup = create_lookup(nodes)

# build bundle count.
logging.info("counting bundles")
blookup = dict()
for i in range(edges.size):

    # get id
    idxa = edges[i]['ctg_a_idx']
    idxb = edges[i]['ctg_b_idx']
    key = make_key(idxa, idxb)
    
    # count it.
    if key not in blookup: