def write_sbol_comp(libraries, compositions, all_parts): """ the function pulls the parts from their SynBioHub libraries and compiles them into an SBOL document Parameters ---------- libraries : DICT Dictionary containing the libraries, the keys are the library abbreviations and the corresponding value is the url, if no abbreviation is used, the url is used for both key and value compositions : DICT Nested dictionary containing the collection names and the corresponding values are the names of the composite design which are also dictionary containing the design descriptions and a dictionary of parts all_parts : SET A set containing all parts that are used in the filled template Returns ------- doc: SBOL Document Document containing all components and sequences Example ------- cwd = os.path.dirname(os.path.abspath("__file__")) #get current working directory path_filled = os.path.join(cwd, "darpa_template.xlsx") table = pd.read_excel (path_filled) compositions, list_of_rows = get_data(table) compositions, all_parts = get_parts(list_of_rows, table, compositions) compositions = check_name(compositions) doc = write_sbol_comp(libraries, compositions) """ doc = Document() sbol2.Config.setOption('sbol_typed_uris', False) for library in libraries: sbol_library = sbol2.PartShop(libraries[library]) for part in all_parts: try: sbol_library.pull(part, doc) except: print(f"The part {part} was not in library {library}") for collection in compositions: print(collection) coll = Collection(collection) doc.addCollection(coll) for design in compositions[collection]: composite_design = doc.componentDefinitions.create(design) composite_design.assemblePrimaryStructure(compositions[collection][design]["Parts"]) composite_design.compile() composite_design.sequence if type(compositions[collection][design]["Description"]) is str: composite_design.description = compositions[collection][design]["Description"] coll.members += [composite_design.identity] # doc.write("Test_Collections.xml") return(doc)
class SBOLOutput(SuperGSLFunction): """Generate SBOL document containing the assemblies.""" name = 'sbol' def get_arguments(self): return [('filename', str), ('assemblies', list)] def get_return_type(self): return type(None) def sanitize_identifier(self, identifier): """Sanitize SuperGSL Identifiers to conform to identifiers in the SBOL spec.""" bad_chars = '[]~:' for c in bad_chars: identifier = identifier.replace(c, '_') return identifier def handle_assembly(self, assembly): """Add each assembly to the SBOL Document.""" self.assembly_count += 1 label = node.label if not label: label = 'Assembly%05d' % self.assembly_count assembly = ComponentDefinition(label) self.sbol_doc.addComponentDefinition(assembly) part_components = [] for part_node in node.parts: part = part_node.part sanitized_ident = self.sanitize_identifier(part.identifier) part_component = ComponentDefinition(sanitized_ident) part_component.roles = part.roles part_component.sequence = Sequence(sanitized_ident, str(part.get_sequence().seq)) part_components.append(part_component) assembly.assemblePrimaryStructure(part_components) assembly.compile() def execute(self, params: dict): """Initialize the SBOL Document.""" setHomespace('http://sbols.org/SuperGSL_Example/') Config.setOption('sbol_compliant_uris', True) Config.setOption('sbol_typed_uris', True) self.sbol_doc = Document() self.assembly_count = 0 assembly_list: AssemblyResultSet = params[0] for assembly in assembly_list: self.handle_assembly(assembly) self.sbol_doc.write('output_sbol.xml')
def execute(self, params: dict): """Initialize the SBOL Document.""" setHomespace('http://sbols.org/SuperGSL_Example/') Config.setOption('sbol_compliant_uris', True) Config.setOption('sbol_typed_uris', True) self.sbol_doc = Document() self.assembly_count = 0 assembly_list: AssemblyResultSet = params[0] for assembly in assembly_list: self.handle_assembly(assembly) self.sbol_doc.write('output_sbol.xml')
def import_sbol_parts(path: str, add_i_seqs=False ) -> Generator[bsb.BasicPart, None, None]: """Imports BasicPart objects using sbol2.Document.exportToFormat. Note: Refer to Biopython documentation for further information on Bio.SeqIO.read(). Refer to pysbol2 documentation for further information. Args: path: path to SBOL file. add_i_seqs: Yields: BasicPart: """ doc = Document(path) fp = tempfile.NamedTemporaryFile(delete=False) doc.exportToFormat("GenBank", fp.name) seqrecs = SeqIO.parse(fp.name, "genbank") fp.close() os.unlink(fp.name) yield from (seqrec2part(seqrec, add_i_seqs) for seqrec in seqrecs)
def retrieve_part_details(self, identifier: str) -> dict: """Retrieve Part details from the remote repository.""" part_doc = Document() part_shop = PartShop(self.repository_url) if self.repository_username and self.repository_password: part_shop.login(self.repository_username, self.repository_password) part_shop.pull(identifier, part_doc) component_definition = part_doc.componentDefinitions[identifier] return { 'roles': component_definition.roles, 'description': component_definition.description, 'sequence': Seq(component_definition.compile()) }
def write_sbol(filled_library, filled_library_metadata, filled_description, molecule_type=BIOPAX_DNA, part_column="Part Name", sequence_column="Sequence", description_column="Description (Optional)", role_column="Role", length_column="length (bp)"): """ the function reads an excel spreadsheet and converts it into valid sbol Parameters ---------- filled_library : DATAFRAME Dataframe containing the library part data filled_metadata : DATAFRAME Dataframe containing the metadata filled_description : DATAFRAME Dataframe containing the description data molecule_type : STRING, default = BIOPAX_DNA Molecule type part_column : STRING, default = "Part Name" Name of the dataframe column that contains the part names sequence_column : STRING, default = "Sequence" Name of the dataframe column that contains the sequence description_column : STRING, default = "Description (Optional)" Name of the dataframe column that contains the library description role_column : STRING, default = "Role" Name of the dataframe column that contains the Role length_column : STRING, default = "length (bp)" Name of the dataframe column that contains the sequence length Returns ------- doc: SBOL Document Document containing all components and sequences Example ------- Doc = write_sbol(filled_library, filled_library_metadata, filled_description) """ doc = Document() Config.setOption('sbol_typed_uris', False) #Define SBOL object and components #Parts Library molecule_type = BIOPAX_DNA #Change later part_column = "Part Name" sequence_column = "Sequence" description_column = "Description (Optional)" role_column = "Role" length_column = "length (bp)" for index, row in filled_library.iterrows(): component = ComponentDefinition(row[part_column], molecule_type) component.roles = ontology[row[role_column]] component.name = row[part_column] if not (pd.isnull(row[description_column])): component.description = row[description_column] doc.addComponentDefinition(component) row[sequence_column] = "".join(row[sequence_column].split()) row[sequence_column] = row[sequence_column].replace(u"\ufeff", "") row[sequence_column] = row[sequence_column].lower() if len(row[sequence_column]) != row[length_column]: logging.warning( f"The length of the sequence {row[part_column]} does not coincide with the length in the corresponding column 'length (bp)'" ) sequence = Sequence(f"{row[part_column]}_sequence", row[sequence_column], SBOL_ENCODING_IUPAC) doc.addSequence(sequence) component.sequences = sequence #Metadata doc.description = str(filled_description.values) doc.name = filled_library_metadata.iloc[0, 1] doc.write('SBOL_testcollection.xml') return (doc)
def get_sbol_document(sbol_string): sbol_string_decoded = base64.b64decode(sbol_string) doc = Document() doc.appendString(sbol_str=sbol_string_decoded, overwrite=True) return doc
def get_sbol(self, construct_id='BASIC_construct', validate=False): """Get the SBOL string representation of the construct. The object outputted is SBOL document which can be written to a file using the "writeString" method. WARNING: validation needs internet connexion. :param construct_id: Construct object ID :type construct_id: str :param validate: perform online SBOL validation :type validate: bool :return: SBOL object :rtype: <sbol.Document> """ _SBOL_ROLE_ASSOC = { 'misc': SO_MISC, 'promoter': SO_PROMOTER, 'rbs': SO_RBS, 'cds': SO_CDS, 'ori': SO_CIRCULAR } setHomespace('https://localhost') doc = Document() # Check duplicated part part_seen = set() dup_part_ids = set() for part in self._parts: if part.id not in part_seen: part_seen.add(part.id) else: dup_part_ids.add(part.id) components = [] for part in self._parts: # Deal with duplicated part if part.get_sbol_id() in dup_part_ids: i = 0 part_id = f'{part.get_sbol_id()}_{i}' while part_id in doc.componentDefinitions: i += 1 part_id = f'{part.get_sbol_id()}_{i}' else: part_id = part.get_sbol_id() # Build part component = ComponentDefinition(part_id) component.roles = _SBOL_ROLE_ASSOC[part.biological_role] component.sequence = Sequence(part_id, part.seq) doc.addComponentDefinition(component) components.append(component) plasmid = ComponentDefinition(construct_id) doc.addComponentDefinition(plasmid) plasmid.assemblePrimaryStructure(components) if validate: logging.info(doc.validate()) return doc