def _write_match_tmp(signatures: dict, u2variants: dict, p_proteins: str, p_uniprot2matches: str, start: str, stop: Optional[str], output: str): proteins = Store(p_proteins) u2matches = Store(p_uniprot2matches) with open(output, "wt", encoding="utf-8") as fh: doc = getDOMImplementation().createDocument(None, None, None) for uniprot_acc, protein in proteins.range(start, stop): elem = doc.createElement("protein") elem.setAttribute("id", uniprot_acc) elem.setAttribute("name", protein["identifier"]) elem.setAttribute("length", str(protein["length"])) elem.setAttribute("crc64", protein["crc64"]) try: protein_entries = u2matches[uniprot_acc] except KeyError: pass else: for signature_acc in sorted(protein_entries): try: signature = signatures[signature_acc] except KeyError: # InterPro entry continue elem.appendChild( _create_match(doc, signature, protein_entries[signature_acc])) finally: elem.writexml(fh, addindent=" ", newl="\n") protein_variants = u2variants.get(uniprot_acc, []) for variant, length, crc64, matches in protein_variants: elem = doc.createElement("protein") elem.setAttribute("id", variant) elem.setAttribute("name", variant) elem.setAttribute("length", str(length)) elem.setAttribute("crc64", crc64) for signature_acc in sorted(matches): try: signature = signatures[signature_acc] except KeyError: # InterPro entry continue elem.appendChild( _create_match(doc, signature, matches[signature_acc])) elem.writexml(fh, addindent=" ", newl="\n")
def _write_feature_tmp(features: dict, p_proteins: str, p_uniprot2features: str, start: str, stop: Optional[str], output: str): proteins = Store(p_proteins) u2features = Store(p_uniprot2features) with open(output, "wt", encoding="utf-8") as fh: doc = getDOMImplementation().createDocument(None, None, None) # for uniprot_acc, protein in proteins.range(start, stop): for uniprot_acc, protein_features in u2features.range(start, stop): protein = proteins[uniprot_acc] elem = doc.createElement("protein") elem.setAttribute("id", uniprot_acc) elem.setAttribute("name", protein["identifier"]) elem.setAttribute("length", str(protein["length"])) elem.setAttribute("crc64", protein["crc64"]) for feature_acc in sorted(protein_features): feature = features[feature_acc] feature_match = protein_features[feature_acc] match = doc.createElement("match") match.setAttribute("id", feature_acc) match.setAttribute("name", feature["name"]) match.setAttribute("dbname", feature["database"]) match.setAttribute("status", 'T') match.setAttribute("model", feature_acc) match.setAttribute("evd", feature["evidence"]) for loc in sorted(feature_match["locations"]): # there is only one fragment per location pos_start, pos_end, seq_feature = loc lcn = doc.createElement("lcn") lcn.setAttribute("start", str(pos_start)) lcn.setAttribute("end", str(pos_end)) if seq_feature: lcn.setAttribute("sequence-feature", seq_feature) match.appendChild(lcn) elem.appendChild(match) elem.writexml(fh, addindent=" ", newl="\n")