def write_sbol(filled_library, filled_library_metadata, filled_description, molecule_type=BIOPAX_DNA, part_column="Part Name", sequence_column="Sequence", description_column="Description (Optional)", role_column="Role", length_column="length (bp)"): """ the function reads an excel spreadsheet and converts it into valid sbol Parameters ---------- filled_library : DATAFRAME Dataframe containing the library part data filled_metadata : DATAFRAME Dataframe containing the metadata filled_description : DATAFRAME Dataframe containing the description data molecule_type : STRING, default = BIOPAX_DNA Molecule type part_column : STRING, default = "Part Name" Name of the dataframe column that contains the part names sequence_column : STRING, default = "Sequence" Name of the dataframe column that contains the sequence description_column : STRING, default = "Description (Optional)" Name of the dataframe column that contains the library description role_column : STRING, default = "Role" Name of the dataframe column that contains the Role length_column : STRING, default = "length (bp)" Name of the dataframe column that contains the sequence length Returns ------- doc: SBOL Document Document containing all components and sequences Example ------- Doc = write_sbol(filled_library, filled_library_metadata, filled_description) """ doc = Document() Config.setOption('sbol_typed_uris', False) #Define SBOL object and components #Parts Library molecule_type = BIOPAX_DNA #Change later part_column = "Part Name" sequence_column = "Sequence" description_column = "Description (Optional)" role_column = "Role" length_column = "length (bp)" for index, row in filled_library.iterrows(): component = ComponentDefinition(row[part_column], molecule_type) component.roles = ontology[row[role_column]] component.name = row[part_column] if not (pd.isnull(row[description_column])): component.description = row[description_column] doc.addComponentDefinition(component) row[sequence_column] = "".join(row[sequence_column].split()) row[sequence_column] = row[sequence_column].replace(u"\ufeff", "") row[sequence_column] = row[sequence_column].lower() if len(row[sequence_column]) != row[length_column]: logging.warning( f"The length of the sequence {row[part_column]} does not coincide with the length in the corresponding column 'length (bp)'" ) sequence = Sequence(f"{row[part_column]}_sequence", row[sequence_column], SBOL_ENCODING_IUPAC) doc.addSequence(sequence) component.sequences = sequence #Metadata doc.description = str(filled_description.values) doc.name = filled_library_metadata.iloc[0, 1] doc.write('SBOL_testcollection.xml') return (doc)
def get_sbol(self, construct_id='BASIC_construct', validate=False): """Get the SBOL string representation of the construct. The object outputted is SBOL document which can be written to a file using the "writeString" method. WARNING: validation needs internet connexion. :param construct_id: Construct object ID :type construct_id: str :param validate: perform online SBOL validation :type validate: bool :return: SBOL object :rtype: <sbol.Document> """ _SBOL_ROLE_ASSOC = { 'misc': SO_MISC, 'promoter': SO_PROMOTER, 'rbs': SO_RBS, 'cds': SO_CDS, 'ori': SO_CIRCULAR } setHomespace('https://localhost') doc = Document() # Check duplicated part part_seen = set() dup_part_ids = set() for part in self._parts: if part.id not in part_seen: part_seen.add(part.id) else: dup_part_ids.add(part.id) components = [] for part in self._parts: # Deal with duplicated part if part.get_sbol_id() in dup_part_ids: i = 0 part_id = f'{part.get_sbol_id()}_{i}' while part_id in doc.componentDefinitions: i += 1 part_id = f'{part.get_sbol_id()}_{i}' else: part_id = part.get_sbol_id() # Build part component = ComponentDefinition(part_id) component.roles = _SBOL_ROLE_ASSOC[part.biological_role] component.sequence = Sequence(part_id, part.seq) doc.addComponentDefinition(component) components.append(component) plasmid = ComponentDefinition(construct_id) doc.addComponentDefinition(plasmid) plasmid.assemblePrimaryStructure(components) if validate: logging.info(doc.validate()) return doc