Ejemplo n.º 1
0
    def from_string(cls, domainlist):
        """
        Creates a new entry from a CathDomainList string
        """
        domainlist = domainlist.strip()
        cols = domainlist.split()
        if len(cols) != 12:
            raise err.ParseError(
                "expected 12 cols in CathDomainList line '{}' (found {})".
                format(domainlist, len(cols)))

        return cls(
            domain_id=str(cols[0]),
            class_code=int(cols[1]),
            arch_code=int(cols[2]),
            top_code=int(cols[3]),
            homol_code=int(cols[4]),
            s35_code=int(cols[5]),
            s60_code=int(cols[6]),
            s95_code=int(cols[7]),
            s100_code=int(cols[8]),
            domain_code=int(cols[9]),
            atom_length=int(cols[10]),
            resolution=float(cols[11]),
        )
Ejemplo n.º 2
0
    def from_string(cls, nameline):
        nameline = nameline.strip()
        cols = nameline.split(sep=None, maxsplit=2)
        if len(cols) != 3:
            raise err.ParseError(
                "expected 3 cols in CathNames line '{}' (found {})".format(
                    nameline, len(cols)))

        name = cols[2]
        if not name.startswith(':'):
            raise err.ParseError(
                "expected name '{}' to start with ':'".format(nameline))
        name = name[1:]

        return cls(cath_id=str(cols[0]),
                   example_domain_id=str(cols[1]),
                   name=str(name))
Ejemplo n.º 3
0
    def run_fasta(self, fasta_file):
        """
        Returns scorecons data for the provided FASTA file.
        
        Returns:
            ScoreconsResult: scorecons result
        """

        tmp_scorecons_file = tempfile.NamedTemporaryFile(mode='w+',
                                                         suffix='.scorecons',
                                                         delete=True)

        scorecons_args = (self.scorecons_path, '-a', fasta_file, '-m',
                          self.matrix_path, '-o', tmp_scorecons_file.name)

        LOG.debug("running scorecons: sys: " + " ".join(scorecons_args))

        try:
            p = Popen(scorecons_args,
                      stdout=PIPE,
                      stderr=PIPE,
                      universal_newlines=True)
            scorecons_out, scorecons_err = p.communicate()

        except subprocess.CalledProcessError as e:
            LOG.error(
                'CMD: %s\nCODE: %s\nOUTPUT: %s\nSTDERR: "%s"\nSTDOUT: "%s"\n',
                e.cmd, e.returncode, e.output, e.stderr, e.stdout)
            raise e
        except:
            raise FileNotFoundError(
                "Encountered error running scorecons: `{}`".format(
                    " ".join(scorecons_args)))

        match = re.search(r'^DOPS score:\s+([0-9.]+)',
                          scorecons_out,
                          flags=re.MULTILINE)
        if not match:
            raise err.ParseError(
                'Failed to find DOPS score in scorecons output: {} (STDERR: {})'
                .format(scorecons_out, scorecons_err))

        dops_score = float(match.group(1))

        sc_numbers = self.__class__.split_scorecons_file(
            tmp_scorecons_file.name)

        res = ScoreconsResult(dops=dops_score, scores=sc_numbers)

        return res
Ejemplo n.º 4
0
    def from_string(cls, domall_line):
        """
        Create a new instance from a Domall string

        Usage:

        ::

            domall_str = '10gsA D02 F01  2  A    2 - A   78 -  A  187 - A  208 -  1  A   79 - A  186 -  A  209 - A  209 - (1)'
            domall = Domall.from_string(domall_str)

            domall.domains[0].segments[0].chain_code    # 'A'
            domall.domains[0].segments[0].start_pdb     # 2
            domall.domains[0].segments[0].start_insert  # None
            domall.domains[0].segments[0].end_pdb       # 78
            domall.domains[0].segments[0].end_insert    # None

        """
        domall_line = domall_line.strip()
        cols = domall_line.split()
        chain_id, dom_count, frag_count = cols[0:3]

        dom_count = int(dom_count[1:])
        frag_count = int(frag_count[1:])

        domains = []
        fragments = []

        idx = 3
        dom_idx = 0
        while dom_idx < dom_count:
            seg_count = int(cols[idx])
            idx += 1
            segments = []
            # LOG.info("dom[%s] seg_count=%s", dom_idx, seg_count)
            for seg_idx in range(seg_count):
                start_chain, start_pdb, start_ins, end_chain, end_pdb, end_ins = cols[idx:idx+6]
                if start_ins == '-':
                    start_ins = None
                if end_ins == '-':
                    end_ins = None
                idx += 6
                seg = Segment(chain_code=start_chain, start_pdb=start_pdb, start_insert=start_ins,
                              end_pdb=end_pdb, end_insert=end_ins)
                # LOG.info("seg[%s]: %s", seg_idx, seg.__dict__)
                segments.extend([seg])
            dom = Domain(segments=segments)
            domains.extend([dom])
            dom_idx += 1

        frag_idx = 0
        while frag_idx < frag_count:
            start_chain, start_pdb, start_ins, end_chain, end_pdb, end_ins, frag_len = cols[
                idx:idx+7]
            idx += 7
            frag_idx += 1
            frag_len_match = re.match(r'^\((\d+)\)', frag_len)
            if not frag_len_match:
                raise err.ParseError(
                    'failed to parse frag len from "{}": {} (idx={})'.format(frag_len, domall_line, idx))
            atom_length = frag_len_match.group(1)
            frag = Fragment(chain_code=start_chain, start_pdb=start_pdb, start_insert=start_ins,
                            end_pdb=end_pdb, end_insert=end_ins, atom_length=atom_length)
            fragments.extend([frag])

        if idx != len(cols):
            raise err.ParseError('col index is {}, but there are {} columns: {}'.format(
                idx, len(cols), domall_line,
            ))

        return cls(
            chain_id=str(chain_id),
            domains=domains,
            fragments=fragments,
        )