def gen_sam_pair(fpath1, fpath2, flip_1, flip_2): ''' pulls info from paired sam file ''' fin1 = open(fpath1) fin2 = open(fpath2) for line1 in fin1: line2 = fin2.readline() # sanity. if line1[0] == "@": continue # tokenize. tok1 = line1.strip().split() tok2 = line2.strip().split() if len(tok1) < 2 or len(tok2) < 2: print tok1, tok2 # simplify. if tok1[1] == '0': orien1 = 0 else: orien1 = 1 if tok2[1] == '0': orien2 = 0 else: orien2 = 1 if flip_1 == True: orien1 = 1 - orien1 if flip_2 == True: orien2 = 1 - orien2 ctg1 = tok1[2] ctg2 = tok2[2] left1 = int(tok1[3]) left2 = int(tok2[3]) right1 = left1 + len(tok1[9]) right2 = left2 + len(tok2[9]) print ctg1<ctg2, orien1, orien2 state = misc.determine_state(ctg1, ctg2, orien1, orien2) # yield the info. yield ctg1, left1, right1, ctg2, left2, right2, state, tok1, tok2 fin1.close() fin2.close()
def agp_graph_undirected(fpath): ''' returns agp graph ''' # load agp array. agp_edges = load_agp(fpath) # make digraph. G = nx.Graph() # add nodes. for i in range(agp_edges.size): # skip contigs themselves. if agp_edges[i]['comp_type'] != 'W': continue # add node info. name = agp_edges[i]['comp_name'] width = agp_edges[i]['comp_stop'] orien = agp_edges[i]['comp_orien'] G.add_node(agp_edges[i]['comp_name'], {'width':width, 'orien':orien}) # add edges. for i in range(agp_edges.size): # skip contigs themselves. if agp_edges[i]['comp_type'] != 'N': continue if i == agp_edges.shape[0] - 1: continue # add sorted edges. ctg1 = agp_edges[i-1]['comp_name'] ctg2 = agp_edges[i+1]['comp_name'] o1 = G.node[ctg1]['orien'] o2 = G.node[ctg2]['orien'] gap = agp_edges[i]['comp_stop'] state = misc.determine_state(ctg1, ctg2, o1, o2) G.add_edge(ctg1, ctg2, {'gap':gap, 'state':state}) # done. return G