Exemple #1
0
 def read_pe_values(lines):
     """read all combined p-values and e-values"""
     result = []
     current_index = next_pe_value_line(0, lines)
     while current_index != -1:
         gene = lines[current_index - 2].strip()
         line = lines[current_index]
         pvalue = float(util.extract_regex('P-VALUE\s+=\s+(\S+)', line))
         evalue = float(util.extract_regex('E-VALUE\s+=\s+(\S+)', line))
         result.append((gene, pvalue, evalue))
         current_index = next_pe_value_line(current_index + 1, lines)
     return result
Exemple #2
0
 def read_pe_values(lines):
     """read all combined p-values and e-values"""
     result = []
     current_index = next_pe_value_line(0, lines)
     while current_index != -1:
         gene = lines[current_index - 2].strip()
         line = lines[current_index]
         pvalue = float(util.extract_regex('P-VALUE\s+=\s+(\S+)', line))
         evalue = float(util.extract_regex('E-VALUE\s+=\s+(\S+)', line))
         result.append((gene, pvalue, evalue))
         current_index = next_pe_value_line(current_index + 1, lines)
     return result
Exemple #3
0
    def read_annotations(lines, genes):
        """extract annotations, genes are given as refseq ids"""
        result = {}
        current_index = next_pe_value_line(0, lines)
        while current_index != -1:
            gene = lines[current_index - 2].strip()
            if gene in genes:
                info_line = lines[current_index]
                length = int(
                    util.extract_regex('LENGTH\s+=\s+(\d+)', info_line))
                has_seqalign_block = True
                diagram_match = re.match('^\s+DIAGRAM:\s+(\d+)$',
                                         lines[current_index + 1])
                if diagram_match is not None:
                    diagram = int(diagram_match.group(1))
                    if diagram == length:
                        has_seqalign_block = False

                if has_seqalign_block:
                    # the diagram line can span several lines and the blank
                    # line after those can span several, so search for the
                    # first non-blank line after the block of blank lines
                    blank_index = current_index + 2
                    while len(lines[blank_index].strip()) > 0:
                        blank_index += 1
                    non_blank_index = blank_index + 1
                    while len(lines[non_blank_index].strip()) == 0:
                        non_blank_index += 1
                    result[gene] = read_seqalign_blocks(
                        lines, non_blank_index, length)

            current_index = next_pe_value_line(current_index + 1, lines)
        return result
Exemple #4
0
    def read_annotations(lines, genes):
        """extract annotations, genes are given as refseq ids"""
        result = {}
        current_index = next_pe_value_line(0, lines)
        while current_index != -1:
            gene = lines[current_index - 2].strip()
            if gene in genes:
                info_line = lines[current_index]
                length = int(util.extract_regex('LENGTH\s+=\s+(\d+)', info_line))
                has_seqalign_block = True
                diagram_match = re.match('^\s+DIAGRAM:\s+(\d+)$',
                                         lines[current_index + 1])
                if diagram_match is not None:
                    diagram = int(diagram_match.group(1))
                    if diagram == length:
                        has_seqalign_block = False

                if has_seqalign_block:
                    # the diagram line can span several lines and the blank
                    # line after those can span several, so search for the
                    # first non-blank line after the block of blank lines
                    blank_index = current_index + 2
                    while len(lines[blank_index].strip()) > 0:
                        blank_index += 1
                    non_blank_index = blank_index + 1
                    while len(lines[non_blank_index].strip()) == 0:
                        non_blank_index += 1
                    result[gene] = read_seqalign_blocks(lines,
                                                        non_blank_index,
                                                        length)

            current_index = next_pe_value_line(current_index + 1, lines)
        return result
Exemple #5
0
 def extract_evalue(infoline):
     """extract the e-value from the info line"""
     return float(util.extract_regex('E-value =\s+\S+', infoline))
Exemple #6
0
 def extract_llr(infoline):
     """extract the llr value from the info line"""
     return int(util.extract_regex('llr =\s+\d+', infoline))
Exemple #7
0
 def extract_num_sites(infoline):
     """extract the sites value from the info line"""
     return int(util.extract_regex('sites =\s+\d+', infoline))
Exemple #8
0
 def extract_width(infoline):
     """extract the width value from the info line"""
     return int(util.extract_regex('width =\s+\d+', infoline))
Exemple #9
0
 def extract_evalue(infoline):
     """extract the e-value from the info line"""
     return float(util.extract_regex('E-value =\s+\S+', infoline))
Exemple #10
0
 def extract_llr(infoline):
     """extract the llr value from the info line"""
     return int(util.extract_regex('llr =\s+\d+', infoline))
Exemple #11
0
 def extract_num_sites(infoline):
     """extract the sites value from the info line"""
     return int(util.extract_regex('sites =\s+\d+', infoline))
Exemple #12
0
 def extract_width(infoline):
     """extract the width value from the info line"""
     return int(util.extract_regex('width =\s+\d+', infoline))