def get_merge_gene(fh, header=[None]): if header[0] is None: header[0] = fh.readline() line = fh.readline() genes = [] while line and line[0] != "#": d = DagLine(line) genes.append(d) line = fh.readline() if len(genes) == 0: return None, None l = header[0] header_string = parse_pyheader(header[0], asstring=True) # save the next header. header[0] = line # header string is joined with JS reverse = JS + "r" + JS in header_string a_start = min(g.a_start for g in genes) a_end = max(g.a_end for g in genes) b_start = min(g.b_start for g in genes) b_end = max(g.b_end for g in genes) if reverse: b_start, b_end = b_end, b_start d = {'a_seqid': genes[0].a_seqid, 'b_seqid': genes[0].b_seqid, 'a_accn': 'a' + header_string, 'b_accn': 'b' + header_string, 'a_start': a_start, 'b_start': b_start, 'a_end': a_end, 'b_end': b_end, 'evalue': 1e-250} return DagLine.from_dict(d), header_string
def get_dag_line(fh): line = fh.readline() if not line: return None, None if line[0] == "#": get_dag_line.header = parse_pyheader(line, asstring=True) line = fh.readline() return DagLine(line), get_dag_line.header
def matches_by_diag_id(matches): """ take the structure returned by parse_file and return a dictionary where the keys are the diag_ids and the values are the dag-pair.""" by_diag = collections.defaultdict(list) for seqid_pair, accn_pair_dict in matches.iteritems(): for accn_pair_key, accn_pair in accn_pair_dict.iteritems(): assert accn_pair['diag_str'] is not None by_diag[accn_pair['diag_str']].append(DagLine.from_pair_dict(accn_pair)) return dict(by_diag)
def matches_by_diag_id(matches): """ take the structure returned by parse_file and return a dictionary where the keys are the diag_ids and the values are the dag-pair.""" by_diag = collections.defaultdict(list) for seqid_pair, accn_pair_dict in matches.iteritems(): for accn_pair_key, accn_pair in accn_pair_dict.iteritems(): assert accn_pair['diag_str'] is not None by_diag[accn_pair['diag_str']].append( DagLine.from_pair_dict(accn_pair)) return dict(by_diag)
def get_merge_gene(fh, header=[None]): if header[0] is None: header[0] = fh.readline() line = fh.readline() genes = [] while line and line[0] != "#": d = DagLine(line) genes.append(d) line = fh.readline() if len(genes) == 0: return None, None l = header[0] header_string = parse_pyheader(header[0], asstring=True) # save the next header. header[0] = line # header string is joined with JS reverse = JS + "r" + JS in header_string a_start = min(g.a_start for g in genes) a_end = max(g.a_end for g in genes) b_start = min(g.b_start for g in genes) b_end = max(g.b_end for g in genes) if reverse: b_start, b_end = b_end, b_start d = { 'a_seqid': genes[0].a_seqid, 'b_seqid': genes[0].b_seqid, 'a_accn': 'a' + header_string, 'b_accn': 'b' + header_string, 'a_start': a_start, 'b_start': b_start, 'a_end': a_end, 'b_end': b_end, 'evalue': 1e-250 } return DagLine.from_dict(d), header_string