Esempio n. 1
0
class RfamFamily(object):
    id = attr.ib(validator=is_a(str))
    name = attr.ib(validator=is_a(str))
    so_terms = attr.ib(validator=is_a(set))
    rna_type = attr.ib(validator=is_a(tuple))

    @classmethod
    def build_all(cls, link_file, family_file):
        so_terms = coll.defaultdict(set)
        with open(link_file, 'r', 'utf-8') as raw:
            for line in raw:
                parts = line.split()
                if parts[1] != 'SO':
                    continue
                so_terms[parts[0]].add('SO:%s' % parts[2])

        families = []
        with open(family_file, 'r', 'iso-8859-1') as raw:
            for row in csv.reader(raw, delimiter='\t'):
                family = row[0]
                name = row[1]
                rna_type = row[18]
                families.append(cls(
                    id=family,
                    name=name,
                    so_terms=so_terms[family],
                    rna_type=rna_type_to_key(rna_type)
                ))
        return families
Esempio n. 2
0
class WithFallBacks(object):
    from_manual = attr.ib(validator=is_a(ManualInference))
    from_name = attr.ib(validator=is_a(FromName))
    from_rna_type = attr.ib(validator=is_a(FromRnaType))
    from_so_terms = attr.ib(validator=is_a(FromSoTerms))
    so_term_search = attr.ib(validator=is_a(SoTermSearch))

    @classmethod
    def build(cls, manual_file, obo_file, max_depth):
        return cls(
            from_manual=ManualInference.build(manual_file),
            from_name=FromName.build(manual_file),
            from_rna_type=FromRnaType.build(manual_file),
            from_so_terms=FromSoTerms.build(manual_file),
            so_term_search=SoTermSearch.build(manual_file, obo_file, max_depth),
        )

    @property
    def name(self):
        return 'fallbacks'

    def simplify(self, result):
        if not result:
            return result

        # Remove misc_RNA if possible.
        if len(result) > 1 and INSDCTypes.misc_RNA in result:
            result = result.remove(INSDCTypes.misc_RNA)

        # Remove other if possible. We remove misc_RNA first because other is
        # more specific.
        if len(result) > 1 and INSDCTypes.other in result:
            result = result.remove(INSDCTypes.other)

        return result

    def __call__(self, family):
        result = self.from_manual(family) or \
            self.from_name(family) or \
            self.from_so_terms(family) or \
            self.from_rna_type(family)

        if not result:
            possible = self.so_term_search(family)
            if possible and possible.rna_types != {INSDCTypes.other}:
                result = possible

        if not result:
            return InferredRfamType(
                family=family,
                method=self.name,
                rna_types=frozenset()
            )

        return self.simplify(result)
Esempio n. 3
0
class InferredRfamType(object):
    family = attr.ib(validator=is_a(RfamFamily))
    method = attr.ib(validator=is_a(str))
    rna_types = attr.ib(validator=is_a(frozenset))

    @classmethod
    def build(cls, family, name, result):
        rna_types = set()
        if isinstance(result, str):
            rna_types.add(result)
        elif isinstance(result, (list, set, tuple)):
            rna_types.update(result)
        elif result is None:
            pass
        else:
            raise ValueError("Unknown type of result")

        final = set()
        for rna_type in rna_types:
            if rna_type == 'antisense':
                rna_type = 'antisense_RNA'
            if rna_type is None:
                continue
            final.add(getattr(INSDCTypes, rna_type))

        return cls(
            family=family,
            method=name,
            rna_types=frozenset(final),
        )

    def remove(self, value):
        if value not in self.rna_types:
            return self
        return attr.assoc(
            self,
            rna_types=frozenset(r for r in self.rna_types if r != value)
        )

    def simple(self):
        return {
            'family': self.family.id,
            'method': self.method,
            'rna_types': ';'.join(r.name for r in self.rna_types),
        }

    def __contains__(self, value):
        return value in self.rna_types

    def __len__(self):
        return len(self.rna_types)

    def __bool__(self):
        return bool(self.rna_types)
Esempio n. 4
0
class SoTermSearch(object):
    graph = attr.ib()
    max_depth = attr.ib(validator=is_a(int))

    @classmethod
    def build(cls, manual_file, filename, max_depth):
        with open(manual_file, 'r', 'utf-8') as handle:
            loaded = json.load(handle)
            assignments = loaded['assignments']

        graph = read_obo(filename)
        for so_term, isndc in assignments.items():
            graph.node[so_term]['isndc'] = isndc

        return cls(
            graph=graph,
            max_depth=max_depth
        )

    @property
    def name(self):
        return 'so-search'

    def dfs(self, term, depth):
        if term not in self.graph:
            return set()
        node = self.graph.node[term]
        if not depth and 'isndc' in node:
            return set([node['isndc']])

        if depth:
            found = set()
            edges = self.graph.out_edges_iter(term, data=True)
            for (_, child, data) in edges:
                found.update(self.dfs(child, depth - 1))
            return found
        return set()

    def search(self, root):
        for depth in range(0, self.max_depth):
            found = self.dfs(root, depth)
            if found:
                return found
        return set()

    def __call__(self, family):
        rna_types = set()
        for so_term in family.so_terms:
            rna_types.update(self.search(so_term))
        return InferredRfamType.build(family, self.name, rna_types)
Esempio n. 5
0
class ManualInference(object):
    assignments = attr.ib(validator=is_a(dict))

    @classmethod
    def build(cls, filename):
        with open(filename, 'r', 'utf-8') as handle:
            loaded = json.load(handle)
            return cls(assignments=loaded['hardcoded'])

    @property
    def name(self):
        return 'manual'

    def __call__(self, family):
        return InferredRfamType.build(
            family,
            self.name,
            self.assignments.get(family.id, None)
        )
Esempio n. 6
0
class FromSoTerms(object):
    mapping = attr.ib(validator=is_a(dict))

    @classmethod
    def build(cls, manual_file):
        with open(manual_file, 'r', 'utf-8') as handle:
            loaded = json.load(handle)
            return cls(mapping=loaded['assignments'])

    @property
    def name(self):
        return 'so-term'

    def __call__(self, family):
        mapped = set(self.mapping.get(so, None) for so in family.so_terms)
        return InferredRfamType.build(
            family,
            self.name,
            mapped
        )
Esempio n. 7
0
class FromName(object):
    informative_names = attr.ib(validator=is_a(dict))

    @classmethod
    def build(cls, filename):
        with open(filename, 'r', 'utf-8') as handle:
            loaded = json.load(handle)
            return cls(
                informative_names=loaded['informative_names'],
            )

    @property
    def name(self):
        return 'name'

    def __call__(self, family):
        for pattern, rna_type in self.informative_names.items():
            if re.search(pattern, family.name, re.IGNORECASE):
                return InferredRfamType.build(family, self.name, rna_type)
        return InferredRfamType.build(family, self.name, None)
Esempio n. 8
0
class FromRnaType(object):
    mapping = attr.ib(validator=is_a(dict))

    @classmethod
    def build(cls, filename):
        with open(filename, 'r', 'utf-8') as handle:
            loaded = json.load(handle)
            given = loaded['rna_type_mapping']
            return cls(
                mapping={rna_type_to_key(r): v for r, v in given.items()}
            )

    @property
    def name(self):
        return 'rna-type'

    def __call__(self, family):
        return InferredRfamType.build(
            family,
            self.name,
            self.mapping.get(family.rna_type, None)
        )
class RfamMatchStatus(object):
    """
    This represents implied problems from a match between an Rfam family and an
    Rna sequence. Problems are detected by various objects and this simply
    records which ones have found issues as well as some data about the issues.
    This serves as a simple way to organize many possible issues that could be
    detected.
    """

    has_issue = attr.ib(validator=is_a(bool))
    upi = attr.ib(validator=is_a(six.string_types))
    taxid = attr.ib()
    finders = attr.ib(validator=is_a(list))
    messages = attr.ib(validator=is_a(list))

    @classmethod
    def with_issue(cls, upi, taxid, finder, msg):
        """
        Create a new instance that indicates that the given finder has found an
        issue specified in the given message.
        """
        return cls(has_issue=True,
                   upi=upi,
                   taxid=taxid,
                   finders=[finder],
                   messages=[msg])

    @classmethod
    def no_issues(cls, upi, taxid):
        """
        Create a new instance that indicates there are no issues.
        """
        return cls(has_issue=False,
                   upi=upi,
                   taxid=taxid,
                   finders=[],
                   messages=[])

    @property
    def names(self):
        """
        Get the names of all finders that have found issues.
        """
        return sorted([finder.name for finder in self.finders])

    def merge(self, status):
        """
        Merge the given status with this one. This will update the issues found
        if any.
        """

        if status.upi != self.upi and self.taxid == status.taxid:
            raise ValueError("Can only merge MatchStatus from the same RNA.")

        self.finders.extend(status.finders)
        self.messages.extend(status.messages)
        self.has_issue = (self.has_issue or status.has_issue)
        return self

    def as_simple_data(self):
        """
        Create a simplified dict representation of this data. This is useful
        for storage.
        """
        return {
            'has_issue': self.has_issue,
            'problems': [{
                'name': n
            } for n in self.names],
        }

    def as_json(self):
        """
        Create a JSON representation of the simplified data.
        """
        return json.dumps(self.as_simple_data())