def __init__(self, cogs_file_content): assert isinstance(cogs_file_content, CogsFileContent) self.cogs_content = cogs_file_content spans = [NamedSpan(a.start - 1, a.end - 1, (a.gene,)) for a in self.cogs_content.get_cogs_lines()] spans = spanning.join(spans) self.spans = spans
def __init__(self, cogs_file_content, overlap=0): assert isinstance(cogs_file_content, CogsFileContent) self.cogs_content = cogs_file_content self.overlap = overlap chr_len = cogs_file_content.chr_len # first get the coding spans & join 'em coding_spans = [NamedSpan(a.start - 1, a.end, (a.gene,)) for a in self.cogs_content.get_cogs_lines()] coding_spans = spanning.join(coding_spans) # then run through and construct the list of complemented spans. spans = [] prev_span = coding_spans[0] # for the first span, start at 0 & go to the beginning of the # first gene. new_name = [""] new_name.extend(prev_span.name) spans.append(NamedSpan(0, prev_span.start + overlap - 1, new_name)) # for each successive region, take the end of the previous gene # and the start of the next & make a span out of it. account # for overlaps... for i in range(1, len(coding_spans)): span = coding_spans[i] new_name = [] new_name.extend(prev_span.name) new_name.extend(span.name) spans.append(NamedSpan(prev_span.end - overlap + 1, span.start + overlap - 1, new_name)) prev_span = span new_name = [] new_name.extend(prev_span.name) new_name.append("") spans.append(NamedSpan(prev_span.end - overlap + 1, chr_len, new_name)) self.spans = spans