def serialize_node(node_, identifier=DEFAULT_IDENTIFIER): """Serialize to the GFA2 specification a graph_element Node or a dictionary that has the same informations. If sequence length is undefined (for example, after parsing a GFA1 Sequence line) a sequence length of 0 is automatically added in the serialization process. :param node: A Graph Element Node or a dictionary :identifier: If set help gaining useful debug information. :returns "": If the object cannot be serialized to GFA. """ identifier = utils._check_identifier(identifier) try: if isinstance(node_, dict): node_dict = copy.deepcopy(node_) # do not modify node_dict since it's not a copy node_length = node_['slen'] if node_length is None: node_length = 0 # 'slen' has been seitched to node_length, but # now 'slen' must be removed node_dict.pop('slen') defined_fields = [ \ node_dict.pop('nid'), \ node_length, \ node_dict.pop('sequence') \ ] fields = ["S"] fields.append(str(node_['nid'])) fields.append(str(node_length)) fields.append(str(node_['sequence'])) fields.extend(utils._serialize_opt_fields(node_dict)) else: # do not modify node_ since it's not a copy node_length = node_.slen if node_length is None: node_length = 0 defined_fields = [ \ node_.nid, \ node_.sequence, \ node_length \ ] fields = ["S"] fields.append(str(node_.nid)) fields.append(str(node_length)) fields.append(str(node_.sequence)) fields.extend(utils._serialize_opt_fields(node_.opt_fields)) if not utils. _are_fields_defined(defined_fields) or \ not utils._check_fields(fields[1:], SEGMENT_FIELDS): raise GFA2SerializationError("Required node elements " \ + "missing or invalid.") return str.join("\t", fields) except (AttributeError, KeyError, GFA2SerializationError) as e: serializer_logger.debug(utils._format_exception(identifier, e)) return ""
def _serialize_to_link(link_, identifier=DEFAULT_IDENTIFIER): identifier = utils._check_identifier(identifier) try: if isinstance(link_, dict): link_dict = copy.deepcopy(link_) utils._remove_common_edge_fields(link_dict) defined_fields = [ \ link_['from_node'], \ link_['from_orn'], \ link_['to_node'], \ link_['to_orn'], \ link_['alignment'] \ ] fields = ["L"] fields.append(str(link_['from_node'])) fields.append(str(link_['from_orn'])) fields.append(str(link_['to_node'])) fields.append(str(link_['to_orn'])) if fv.is_gfa1_cigar(link_['alignment']): fields.append(str(link_['alignment'])) else: fields.append("*") if not link_['eid'] in (None, '*'): fields.append("ID:Z:" + str(link_['eid'])) fields.extend(utils._serialize_opt_fields(link_dict)) else: defined_fields = [ \ link_.from_node, \ link_.from_orn, \ link_.to_node, \ link_.to_orn, \ link_.alignment \ ] fields = ["L"] fields.append(str(link_.from_node)) fields.append(str(link_.from_orn)) fields.append(str(link_.to_node)) fields.append(str(link_.to_orn)) if fv.is_gfa1_cigar(link_.alignment): fields.append(str(link_.alignment)) else: fields.append("*") if not link_.eid in (None, '*'): fields.append("ID:Z:" + str(link_.eid)) fields.extend(utils._serialize_opt_fields(link_.opt_fields)) if not utils._are_fields_defined(defined_fields) or \ not utils._check_fields(fields[1:], LINK_FIELDS): raise GFA1SerializationError() return str.join("\t", fields) except (KeyError, AttributeError, GFA1SerializationError) as e: serializer_logger.debug(utils._format_exception(identifier, e)) return ""
def _serialize_to_fragment(fragment_, identifier=DEFAULT_IDENTIFIER): identifier = utils._check_identifier(identifier) try: if isinstance(fragment_, dict): fragment_dict = copy.deepcopy(fragment_) utils._remove_common_edge_fields(fragment_dict) defined_fields = [\ fragment_['from_node'], \ fragment_['to_node'], \ fragment_['to_orn'], \ fragment_['from_positions'][0], \ fragment_['from_positions'][1], \ fragment_['to_positions'][0], \ fragment_['to_positions'][1], \ fragment_['alignment'] \ ] fields = ["F"] fields.append(str(fragment_['from_node'])) fields.append(str(fragment_['to_node']) + str(fragment_['to_orn'])) fields.append(str(fragment_['from_positions'][0])) fields.append(str(fragment_['from_positions'][1])) fields.append(str(fragment_['to_positions'][0])) fields.append(str(fragment_['to_positions'][1])) fields.append(str(fragment_['alignment'])) fields.extend(utils._serialize_opt_fields(fragment_dict)) else: defined_fields = [\ fragment_.from_node, \ fragment_.to_node, \ fragment_.to_orn, \ fragment_.from_positions[0], \ fragment_.from_positions[1], \ fragment_.to_positions[0], \ fragment_.to_positions[1], \ fragment_.alignment \ ] fields = ["F"] fields.append(str(fragment_.from_node)) fields.append(str(fragment_.to_node) + str(fragment_.to_orn)) fields.append(str(fragment_.from_positions[0])) fields.append(str(fragment_.from_positions[1])) fields.append(str(fragment_.to_positions[0])) fields.append(str(fragment_.to_positions[1])) fields.append(str(fragment_.alignment)) fields.extend(utils._serialize_opt_fields(fragment_.opt_fields)) if not utils. _are_fields_defined(defined_fields) or \ not utils._check_fields(fields[1:], FRAGMENT_FIELDS): raise GFA2SerializationError("Required Fragment elements " \ + "missing or invalid.") return str.join("\t", fields) except (KeyError, AttributeError, GFA2SerializationError) as e: serializer_logger.debug(utils._format_exception(identifier, e)) return ""
def serialize_subgraph(subgraph_, identifier=DEFAULT_IDENTIFIER, gfa_=None): """Serialize a Subgraph object or an equivalent dictionary. :returns "": If subgraph cannot be serialized. :TODO: Check with `gfa` for OGroup in UGroup. See GFA2 spec. """ identifier = utils._check_identifier(identifier) try: if isinstance(subgraph_, dict): subgraph_dict = copy.deepcopy(subgraph_) defined_fields = [\ subgraph_dict.pop('sub_id'), \ subgraph_dict.pop('elements') \ ] fields = ["O"] if are_elements_oriented(\ subgraph_['elements']) else \ ["U"] fields.append(str(subgraph_['sub_id'])) fields.append(_serialize_subgraph_elements(\ subgraph_['elements'], gfa_)) if 'overlaps' in subgraph_: subgraph_dict.pop('overlaps') fields.extend(utils._serialize_opt_fields(subgraph_dict)) else: opt_fields = copy.deepcopy(subgraph_.opt_fields) defined_fields = [\ subgraph_.sub_id, \ subgraph_.elements \ ] fields = ["O"] if are_elements_oriented(subgraph_.elements) else \ ["U"] fields.append(str(subgraph_.sub_id)) fields.append( _serialize_subgraph_elements(subgraph_.elements, gfa_)) if 'overlaps' in subgraph_.opt_fields: opt_fields.pop('overlaps') fields.extend(utils._serialize_opt_fields(subgraph_.opt_fields)) group_fields = OGROUP_FIELDS if fields[0] == "O" else \ UGROUP_FIELDS if not utils. _are_fields_defined(defined_fields) or \ not utils._check_fields(fields[1:], group_fields): raise GFA2SerializationError("Required Subgraph elements " \ + "missing or invalid.") return str.join("\t", fields) except (KeyError, ValueError, AttributeError, GFA2SerializationError) as e: serializer_logger.debug(utils._format_exception(identifier, e)) return ""
def serialize_subgraph(subgraph_, identifier=DEFAULT_IDENTIFIER, gfa_=None): """Serialize a Subgraph object or an equivalent dictionary. """ identifier = utils._check_identifier(identifier) try: if isinstance(subgraph_, dict): subgraph_dict = copy.deepcopy(subgraph_) defined_fields = [\ subgraph_dict.pop('sub_id'), \ subgraph_dict.pop('elements') \ ] fields = ["P"] fields.append(subgraph_['sub_id']) fields.append( _serialize_subgraph_elements(subgraph_['elements'], gfa_)) if 'overlaps' in subgraph_: subgraph_dict.pop('overlaps') fields.append(str.join(",", subgraph_['overlaps'].value)) else: fields.append("*") fields.extend(utils._serialize_opt_fields(subgraph_dict)) else: defined_fields = [\ subgraph_.sub_id, \ subgraph_.elements \ ] opt_fields = copy.deepcopy(subgraph_.opt_fields) fields = ["P"] fields.append(subgraph_.sub_id) fields.append( _serialize_subgraph_elements(subgraph_.elements, gfa_)) if 'overlaps' in subgraph_.opt_fields: opt_fields.pop('overlaps') fields.append( str.join(",", subgraph_.opt_fields['overlaps'].value)) else: fields.append("*") fields.extend(utils._serialize_opt_fields(opt_fields)) if not utils._are_fields_defined(defined_fields) or \ not utils._check_fields(fields[1:], PATH_FIELDS): raise GFA1SerializationError("Required fields missing or" \ + " not valid.") return str.join("\t", fields) except (KeyError, AttributeError, GFA1SerializationError) as e: serializer_logger.debug(utils._format_exception(identifier, e)) return ""
def _serialize_to_gap(gap_, identifier=DEFAULT_IDENTIFIER): identifier = utils._check_identifier(identifier) try: if isinstance(gap_, dict): gap_dict = copy.deepcopy(gap_) utils._remove_common_edge_fields(gap_dict) defined_fields = [\ gap_['eid'], \ gap_['from_node'], \ gap_['from_orn'], \ gap_['to_node'], \ gap_['to_orn'], \ gap_['distance'], \ gap_['variance'] \ ] fields = ["G"] fields.append(str(gap_['eid'])) fields.append(str(gap_['from_node']) + str(gap_['from_orn'])) fields.append(str(gap_['to_node']) + str(gap_['to_orn'])) fields.append(str(gap_['distance'])) fields.append(str(gap_['variance'])) fields.extend(utils._serialize_opt_fields(gap_dict)) return str.join("\t", fields) else: defined_fields = [\ gap_.eid, \ gap_.from_node, \ gap_.from_orn, \ gap_.to_node, \ gap_.to_orn, \ gap_.distance, \ gap_.variance \ ] fields = ["G"] fields.append(str(gap_.eid)) fields.append(str(gap_.from_node) + str(gap_.from_orn)) fields.append(str(gap_.to_node) + str(gap_.to_orn)) fields.append(str(gap_.distance)) fields.append(str(gap_.variance)) fields.extend(utils._serialize_opt_fields(gap_.opt_fields)) if not utils. _are_fields_defined(defined_fields) or \ not utils._check_fields(fields[1:], GAP_FIELDS): raise GFA2SerializationError("Required Gap elements " \ + "missing or invalid.") return str.join("\t", fields) except (AttributeError, KeyError, GFA2SerializationError) as e: serializer_logger.debug(utils._format_exception(identifier, e)) return ""
def serialize_node(node_, identifier=DEFAULT_IDENTIFIER): """Serialize to the GFA1 specification a Graph Element Node or a dictionary that has the same informations. :param node: A Graph Element Node or a dictionary. :param identifier: If set help gaining useful debug information. :return "": If the object cannot be serialized to GFA. """ identifier = utils._check_identifier(identifier) try: if isinstance(node_, dict): node_dict = copy.deepcopy(node_) defined_fields = [ \ node_dict.pop('nid'), \ node_dict.pop('sequence') \ ] node_dict.pop('slen') fields = ["S"] fields.append(str(node_['nid'])) fields.append(str(node_['sequence'])) if node_['slen'] != None: fields.append("LN:i:" + str(node_['slen'])) fields.extend(utils._serialize_opt_fields(node_dict)) else: defined_fields = [ \ node_.nid, \ node_.sequence ] fields = ["S"] fields.append(str(node_.nid)) fields.append(str(node_.sequence)) if node_.slen != None: fields.append("LN:i:" + str(node_.slen)) fields.extend(utils._serialize_opt_fields(node_.opt_fields)) if not utils._are_fields_defined(defined_fields) or \ not utils._check_fields(fields[1:], SEGMENT_FIELDS): raise GFA1SerializationError("Required node elements " \ + "missing or invalid.") return str.join("\t", fields) except (KeyError, AttributeError, GFA1SerializationError) as e: serializer_logger.debug(utils._format_exception(identifier, e)) return ""
def _serialize_to_containment(containment_, identifier=DEFAULT_IDENTIFIER): identifier = utils._check_identifier(identifier) try: if isinstance(containment_, dict): containment_dict = copy.deepcopy(containment_) utils._remove_common_edge_fields(containment_dict) containment_dict.pop('pos') defined_fields = [ \ containment_['from_node'], \ containment_['from_orn'], \ containment_['to_node'], \ containment_['to_orn'], \ containment_['alignment'], \ containment_['pos'].value ] fields = ["C"] fields.append(str(containment_['from_node'])) fields.append(str(containment_['from_orn'])) fields.append(str(containment_['to_node'])) fields.append(str(containment_['to_orn'])) fields.append(str(containment_['pos'].value)) if fv.is_gfa1_cigar(containment_['alignment']): fields.append(str(containment_['alignment'])) else: fields.append("*") if not containment_['eid'] in (None, '*'): fields.append("ID:Z:" + str(containment_['eid'])) fields.extend(utils._serialize_opt_fields(containment_dict)) else: defined_fields = [ \ containment_.from_node, \ containment_.from_orn, \ containment_.to_node, \ containment_.to_orn, \ containment_.alignment, \ containment_.opt_fields['pos'].value \ ] fields = ["C"] opt_fields = copy.deepcopy(containment_.opt_fields) opt_fields.pop('pos') fields.append(str(containment_.from_node)) fields.append(str(containment_.from_orn)) fields.append(str(containment_.to_node)) fields.append(str(containment_.to_orn)) fields.append(str(containment_.opt_fields['pos'].value)) if fv.is_gfa1_cigar(containment_.alignment): fields.append(str(containment_.alignment)) else: fields.append("*") if not containment_.eid in (None, '*'): fields.append("ID:Z:" + str(containment_.eid)) fields.extend(utils._serialize_opt_fields(opt_fields)) if not utils._are_fields_defined(defined_fields) or \ not utils._check_fields(fields[1:], CONTAINMENT_FIELDS): raise GFA1SerializationError() return str.join("\t", fields) except (KeyError, AttributeError, GFA1SerializationError) as e: serializer_logger.debug(utils._format_exception(identifier, e)) return ""