Beispiel #1
0
def serialize_node(node_, identifier=DEFAULT_IDENTIFIER):
    """Serialize to the GFA2 specification a graph_element Node or a
    dictionary that has the same informations.

    If sequence length is undefined (for example, after parsing
    a GFA1 Sequence line) a sequence length of 0 is automatically
    added in the serialization process.

    :param node: A Graph Element Node or a dictionary
    :identifier: If set help gaining useful debug information.
    :returns "": If the object cannot be serialized to GFA.
    """
    identifier = utils._check_identifier(identifier)
    try:
        if isinstance(node_, dict):
            node_dict = copy.deepcopy(node_)
            # do not modify node_dict since it's not a copy
            node_length = node_['slen']
            if node_length is None:
                node_length = 0

            # 'slen' has been seitched to node_length, but
            # now 'slen' must be removed
            node_dict.pop('slen')
            defined_fields = [ \
                                node_dict.pop('nid'), \
                                node_length, \
                                node_dict.pop('sequence') \
                             ]
            fields = ["S"]
            fields.append(str(node_['nid']))
            fields.append(str(node_length))
            fields.append(str(node_['sequence']))
            fields.extend(utils._serialize_opt_fields(node_dict))
        else:
            # do not modify node_ since it's not a copy
            node_length = node_.slen
            if node_length is None:
                node_length = 0
            defined_fields = [ \
                               node_.nid, \
                               node_.sequence, \
                               node_length \
                             ]
            fields = ["S"]
            fields.append(str(node_.nid))
            fields.append(str(node_length))
            fields.append(str(node_.sequence))
            fields.extend(utils._serialize_opt_fields(node_.opt_fields))

        if not utils. _are_fields_defined(defined_fields) or \
           not utils._check_fields(fields[1:], SEGMENT_FIELDS):
            raise GFA2SerializationError("Required node elements " \
                                        + "missing or invalid.")

        return str.join("\t", fields)
    except (AttributeError, KeyError, GFA2SerializationError) as e:
        serializer_logger.debug(utils._format_exception(identifier, e))
        return ""
Beispiel #2
0
def _serialize_to_link(link_, identifier=DEFAULT_IDENTIFIER):
    identifier = utils._check_identifier(identifier)
    try:
        if isinstance(link_, dict):
            link_dict = copy.deepcopy(link_)
            utils._remove_common_edge_fields(link_dict)
            defined_fields = [ \
                                link_['from_node'], \
                                link_['from_orn'], \
                                link_['to_node'], \
                                link_['to_orn'], \
                                link_['alignment'] \
                             ]
            fields = ["L"]
            fields.append(str(link_['from_node']))
            fields.append(str(link_['from_orn']))
            fields.append(str(link_['to_node']))
            fields.append(str(link_['to_orn']))

            if fv.is_gfa1_cigar(link_['alignment']):
                fields.append(str(link_['alignment']))
            else:
                fields.append("*")
            if not link_['eid'] in (None, '*'):
                fields.append("ID:Z:" + str(link_['eid']))
            fields.extend(utils._serialize_opt_fields(link_dict))
        else:
            defined_fields = [ \
                                link_.from_node, \
                                link_.from_orn, \
                                link_.to_node, \
                                link_.to_orn, \
                                link_.alignment \
                             ]
            fields = ["L"]
            fields.append(str(link_.from_node))
            fields.append(str(link_.from_orn))
            fields.append(str(link_.to_node))
            fields.append(str(link_.to_orn))

            if fv.is_gfa1_cigar(link_.alignment):
                fields.append(str(link_.alignment))
            else:
                fields.append("*")

            if not link_.eid in (None, '*'):
                fields.append("ID:Z:" + str(link_.eid))
            fields.extend(utils._serialize_opt_fields(link_.opt_fields))

        if not utils._are_fields_defined(defined_fields) or \
           not utils._check_fields(fields[1:], LINK_FIELDS):
            raise GFA1SerializationError()

        return str.join("\t", fields)

    except (KeyError, AttributeError, GFA1SerializationError) as e:
        serializer_logger.debug(utils._format_exception(identifier, e))
        return ""
Beispiel #3
0
def _serialize_to_fragment(fragment_, identifier=DEFAULT_IDENTIFIER):
    identifier = utils._check_identifier(identifier)
    try:
        if isinstance(fragment_, dict):

            fragment_dict = copy.deepcopy(fragment_)
            utils._remove_common_edge_fields(fragment_dict)
            defined_fields = [\
                                fragment_['from_node'], \
                                fragment_['to_node'], \
                                fragment_['to_orn'], \
                                fragment_['from_positions'][0], \
                                fragment_['from_positions'][1], \
                                fragment_['to_positions'][0], \
                                fragment_['to_positions'][1], \
                                fragment_['alignment'] \
                            ]
            fields = ["F"]
            fields.append(str(fragment_['from_node']))
            fields.append(str(fragment_['to_node']) + str(fragment_['to_orn']))
            fields.append(str(fragment_['from_positions'][0]))
            fields.append(str(fragment_['from_positions'][1]))
            fields.append(str(fragment_['to_positions'][0]))
            fields.append(str(fragment_['to_positions'][1]))
            fields.append(str(fragment_['alignment']))
            fields.extend(utils._serialize_opt_fields(fragment_dict))
        else:
            defined_fields = [\
                                fragment_.from_node, \
                                fragment_.to_node, \
                                fragment_.to_orn, \
                                fragment_.from_positions[0], \
                                fragment_.from_positions[1], \
                                fragment_.to_positions[0], \
                                fragment_.to_positions[1], \
                                fragment_.alignment \
                             ]
            fields = ["F"]
            fields.append(str(fragment_.from_node))
            fields.append(str(fragment_.to_node) + str(fragment_.to_orn))
            fields.append(str(fragment_.from_positions[0]))
            fields.append(str(fragment_.from_positions[1]))
            fields.append(str(fragment_.to_positions[0]))
            fields.append(str(fragment_.to_positions[1]))
            fields.append(str(fragment_.alignment))
            fields.extend(utils._serialize_opt_fields(fragment_.opt_fields))

        if not utils. _are_fields_defined(defined_fields) or \
           not utils._check_fields(fields[1:], FRAGMENT_FIELDS):
            raise GFA2SerializationError("Required Fragment elements " \
                                        + "missing or invalid.")

        return str.join("\t", fields)

    except (KeyError, AttributeError, GFA2SerializationError) as e:
        serializer_logger.debug(utils._format_exception(identifier, e))
        return ""
Beispiel #4
0
def serialize_subgraph(subgraph_, identifier=DEFAULT_IDENTIFIER, gfa_=None):
    """Serialize a Subgraph object or an equivalent dictionary.

    :returns "": If subgraph cannot be serialized.

    :TODO:
        Check with `gfa` for OGroup in UGroup.
        See GFA2 spec.
    """
    identifier = utils._check_identifier(identifier)
    try:
        if isinstance(subgraph_, dict):
            subgraph_dict = copy.deepcopy(subgraph_)
            defined_fields = [\
                                subgraph_dict.pop('sub_id'), \
                                subgraph_dict.pop('elements') \
                             ]
            fields = ["O"] if are_elements_oriented(\
                                        subgraph_['elements']) else \
                     ["U"]
            fields.append(str(subgraph_['sub_id']))
            fields.append(_serialize_subgraph_elements(\
                                        subgraph_['elements'], gfa_))
            if 'overlaps' in subgraph_:
                subgraph_dict.pop('overlaps')
            fields.extend(utils._serialize_opt_fields(subgraph_dict))
        else:
            opt_fields = copy.deepcopy(subgraph_.opt_fields)
            defined_fields = [\
                                subgraph_.sub_id, \
                                subgraph_.elements \
                             ]
            fields = ["O"] if are_elements_oriented(subgraph_.elements) else \
                     ["U"]
            fields.append(str(subgraph_.sub_id))
            fields.append(
                _serialize_subgraph_elements(subgraph_.elements, gfa_))
            if 'overlaps' in subgraph_.opt_fields:
                opt_fields.pop('overlaps')
            fields.extend(utils._serialize_opt_fields(subgraph_.opt_fields))

        group_fields = OGROUP_FIELDS if fields[0] == "O" else \
                       UGROUP_FIELDS
        if not utils. _are_fields_defined(defined_fields) or \
           not utils._check_fields(fields[1:], group_fields):
            raise GFA2SerializationError("Required Subgraph elements " \
                                        + "missing or invalid.")

        return str.join("\t", fields)
    except (KeyError, ValueError, AttributeError, GFA2SerializationError) as e:
        serializer_logger.debug(utils._format_exception(identifier, e))
        return ""
Beispiel #5
0
def serialize_subgraph(subgraph_, identifier=DEFAULT_IDENTIFIER, gfa_=None):
    """Serialize a Subgraph object or an equivalent dictionary.
    """
    identifier = utils._check_identifier(identifier)
    try:
        if isinstance(subgraph_, dict):
            subgraph_dict = copy.deepcopy(subgraph_)
            defined_fields = [\
                                subgraph_dict.pop('sub_id'), \
                                subgraph_dict.pop('elements') \
                             ]
            fields = ["P"]
            fields.append(subgraph_['sub_id'])
            fields.append(
                _serialize_subgraph_elements(subgraph_['elements'], gfa_))

            if 'overlaps' in subgraph_:
                subgraph_dict.pop('overlaps')
                fields.append(str.join(",", subgraph_['overlaps'].value))
            else:
                fields.append("*")
            fields.extend(utils._serialize_opt_fields(subgraph_dict))
        else:
            defined_fields = [\
                                subgraph_.sub_id, \
                                subgraph_.elements \
                             ]
            opt_fields = copy.deepcopy(subgraph_.opt_fields)

            fields = ["P"]
            fields.append(subgraph_.sub_id)
            fields.append(
                _serialize_subgraph_elements(subgraph_.elements, gfa_))
            if 'overlaps' in subgraph_.opt_fields:
                opt_fields.pop('overlaps')
                fields.append(
                    str.join(",", subgraph_.opt_fields['overlaps'].value))
            else:
                fields.append("*")
            fields.extend(utils._serialize_opt_fields(opt_fields))

        if not utils._are_fields_defined(defined_fields) or \
           not utils._check_fields(fields[1:], PATH_FIELDS):
            raise GFA1SerializationError("Required fields missing or" \
                                        + " not valid.")
        return str.join("\t", fields)

    except (KeyError, AttributeError, GFA1SerializationError) as e:
        serializer_logger.debug(utils._format_exception(identifier, e))
        return ""
Beispiel #6
0
def _serialize_to_gap(gap_, identifier=DEFAULT_IDENTIFIER):
    identifier = utils._check_identifier(identifier)
    try:
        if isinstance(gap_, dict):
            gap_dict = copy.deepcopy(gap_)
            utils._remove_common_edge_fields(gap_dict)
            defined_fields = [\
                                gap_['eid'], \
                                gap_['from_node'], \
                                gap_['from_orn'], \
                                gap_['to_node'], \
                                gap_['to_orn'], \
                                gap_['distance'], \
                                gap_['variance'] \
                            ]
            fields = ["G"]
            fields.append(str(gap_['eid']))
            fields.append(str(gap_['from_node']) + str(gap_['from_orn']))
            fields.append(str(gap_['to_node']) + str(gap_['to_orn']))
            fields.append(str(gap_['distance']))
            fields.append(str(gap_['variance']))

            fields.extend(utils._serialize_opt_fields(gap_dict))
            return str.join("\t", fields)
        else:
            defined_fields = [\
                                gap_.eid, \
                                gap_.from_node, \
                                gap_.from_orn, \
                                gap_.to_node, \
                                gap_.to_orn, \
                                gap_.distance, \
                                gap_.variance \
                            ]
            fields = ["G"]
            fields.append(str(gap_.eid))
            fields.append(str(gap_.from_node) + str(gap_.from_orn))
            fields.append(str(gap_.to_node) + str(gap_.to_orn))
            fields.append(str(gap_.distance))
            fields.append(str(gap_.variance))
            fields.extend(utils._serialize_opt_fields(gap_.opt_fields))

        if not utils. _are_fields_defined(defined_fields) or \
           not utils._check_fields(fields[1:], GAP_FIELDS):
            raise GFA2SerializationError("Required Gap elements " \
                                        + "missing or invalid.")
        return str.join("\t", fields)
    except (AttributeError, KeyError, GFA2SerializationError) as e:
        serializer_logger.debug(utils._format_exception(identifier, e))
        return ""
Beispiel #7
0
def serialize_node(node_, identifier=DEFAULT_IDENTIFIER):
    """Serialize to the GFA1 specification a Graph Element Node or a
    dictionary that has the same informations.

    :param node: A Graph Element Node or a dictionary.
    :param identifier: If set help gaining useful debug information.
    :return "": If the object cannot be serialized to GFA.
    """
    identifier = utils._check_identifier(identifier)
    try:
        if isinstance(node_, dict):

            node_dict = copy.deepcopy(node_)
            defined_fields = [ \
                                node_dict.pop('nid'), \
                                node_dict.pop('sequence') \
                             ]
            node_dict.pop('slen')
            fields = ["S"]
            fields.append(str(node_['nid']))
            fields.append(str(node_['sequence']))
            if node_['slen'] != None:
                fields.append("LN:i:" + str(node_['slen']))

            fields.extend(utils._serialize_opt_fields(node_dict))
        else:
            defined_fields = [ \
                                node_.nid, \
                                node_.sequence
                             ]
            fields = ["S"]
            fields.append(str(node_.nid))
            fields.append(str(node_.sequence))
            if node_.slen != None:
                fields.append("LN:i:" + str(node_.slen))
            fields.extend(utils._serialize_opt_fields(node_.opt_fields))

        if not utils._are_fields_defined(defined_fields) or \
           not utils._check_fields(fields[1:], SEGMENT_FIELDS):
            raise GFA1SerializationError("Required node elements " \
                                        + "missing or invalid.")

        return str.join("\t", fields)
    except (KeyError, AttributeError, GFA1SerializationError) as e:
        serializer_logger.debug(utils._format_exception(identifier, e))
        return ""
Beispiel #8
0
def serialize_edge(edge_, identifier=DEFAULT_IDENTIFIER):
    """Converts to a GFA1 line the given edge.

    Fragments and Gaps cannot be represented in GFA1 specification,
    so they are not serialized.
    """
    identifier = utils._check_identifier(identifier)
    try:
        if isinstance(edge_, dict):
            if edge_['eid'] is None:  # edge_ is a fragment
                raise GFA1SerializationError("Cannot serialize Fragment " \
                                        + "to GFA1.")
            elif edge_['distance'] != None or \
              edge_['variance'] != None: # edge_ is a gap
                raise GFA1SerializationError("Cannot serialize GAP " \
                                        + "to GFA1.")
            elif 'pos' in edge_:  # edge_ is a containment
                return _serialize_to_containment(edge_, identifier)
            elif edge_['is_dovetail'] is True:
                return _serialize_to_link(edge_, identifier)
            else:
                raise GFA1SerializationError("Cannot convert an " \
                                            + "internal edge to a Link")
        else:
            if edge_.eid is None:  # edge_ is a fragment
                raise GFA1SerializationError("Cannot serialize Fragment " \
                                        + "to GFA1.")
            elif edge_.distance != None or \
              edge_.variance != None: # edge_ is a gap
                raise GFA1SerializationError("Cannot serialize GAP " \
                                        + "to GFA1.")
            elif 'pos' in edge_.opt_fields:  # edge_ is a containment
                return _serialize_to_containment(edge_)
            elif edge_.is_dovetail is True:
                return _serialize_to_link(edge_)
            else:
                raise GFA1SerializationError("Cannot convert an " \
                                            + "internal edge to a Link")
    except (KeyError, AttributeError, GFA1SerializationError) as e:
        serializer_logger.debug(utils._format_exception(identifier, e))
        return ""
Beispiel #9
0
def serialize_edge(edge_, identifier=DEFAULT_IDENTIFIER):
    """Converts to a GFA2 line the given edge.
    """
    identifier = utils._check_identifier(identifier)
    try:
        if isinstance(edge_, dict):
            if edge_['eid'] is None:  # edge_ is a fragment
                return _serialize_to_fragment(edge_, identifier)
            if edge_['distance'] != None or \
              edge_['variance'] != None: # edge_ is a gap
                return _serialize_to_gap(edge_, identifier)
            return _serialize_to_edge(edge_, identifier)
        else:
            if edge_.eid is None:  # edge_ is a fragment
                return _serialize_to_fragment(edge_, identifier)
            if edge_.distance != None or \
              edge_.variance != None: # edge_ is a gap
                return _serialize_to_gap(edge_, identifier)
            return _serialize_to_edge(edge_)

    except (KeyError, AttributeError) as e:
        serializer_logger.debug(utils._format_exception(identifier, e))
        return ""
Beispiel #10
0
def _serialize_to_containment(containment_, identifier=DEFAULT_IDENTIFIER):
    identifier = utils._check_identifier(identifier)
    try:
        if isinstance(containment_, dict):
            containment_dict = copy.deepcopy(containment_)
            utils._remove_common_edge_fields(containment_dict)
            containment_dict.pop('pos')
            defined_fields = [ \
                                containment_['from_node'], \
                                containment_['from_orn'], \
                                containment_['to_node'], \
                                containment_['to_orn'], \
                                containment_['alignment'], \
                                containment_['pos'].value
                             ]
            fields = ["C"]
            fields.append(str(containment_['from_node']))
            fields.append(str(containment_['from_orn']))
            fields.append(str(containment_['to_node']))
            fields.append(str(containment_['to_orn']))
            fields.append(str(containment_['pos'].value))

            if fv.is_gfa1_cigar(containment_['alignment']):
                fields.append(str(containment_['alignment']))
            else:
                fields.append("*")

            if not containment_['eid'] in (None, '*'):
                fields.append("ID:Z:" + str(containment_['eid']))

            fields.extend(utils._serialize_opt_fields(containment_dict))
        else:
            defined_fields = [ \
                                containment_.from_node, \
                                containment_.from_orn, \
                                containment_.to_node, \
                                containment_.to_orn, \
                                containment_.alignment, \
                                containment_.opt_fields['pos'].value \
                             ]
            fields = ["C"]
            opt_fields = copy.deepcopy(containment_.opt_fields)
            opt_fields.pop('pos')
            fields.append(str(containment_.from_node))
            fields.append(str(containment_.from_orn))
            fields.append(str(containment_.to_node))
            fields.append(str(containment_.to_orn))
            fields.append(str(containment_.opt_fields['pos'].value))

            if fv.is_gfa1_cigar(containment_.alignment):
                fields.append(str(containment_.alignment))
            else:
                fields.append("*")
            if not containment_.eid in (None, '*'):
                fields.append("ID:Z:" + str(containment_.eid))
            fields.extend(utils._serialize_opt_fields(opt_fields))

        if not utils._are_fields_defined(defined_fields) or \
           not utils._check_fields(fields[1:], CONTAINMENT_FIELDS):
            raise GFA1SerializationError()

        return str.join("\t", fields)

    except (KeyError, AttributeError, GFA1SerializationError) as e:
        serializer_logger.debug(utils._format_exception(identifier, e))
        return ""