예제 #1
0
def write_sbol(filled_library,
               filled_library_metadata,
               filled_description,
               molecule_type=BIOPAX_DNA,
               part_column="Part Name",
               sequence_column="Sequence",
               description_column="Description (Optional)",
               role_column="Role",
               length_column="length (bp)"):
    """
    the function reads an excel spreadsheet and converts it into valid sbol

    Parameters
    ----------
    filled_library : DATAFRAME
        Dataframe containing the library part data
    filled_metadata : DATAFRAME
        Dataframe containing the metadata
    filled_description : DATAFRAME
        Dataframe containing the description data
    molecule_type : STRING, default = BIOPAX_DNA
        Molecule type
    part_column : STRING, default = "Part Name"
        Name of the dataframe column that contains the part names
    sequence_column : STRING, default = "Sequence"
        Name of the dataframe column that contains the sequence
    description_column : STRING, default = "Description (Optional)"
        Name of the dataframe column that contains the library description
    role_column : STRING, default = "Role"
        Name of the dataframe column that contains the Role
    length_column : STRING, default = "length (bp)"
        Name of the dataframe column that contains the sequence length
    

    Returns
    -------
     doc: SBOL Document
         Document containing all components and sequences
     
     Example
     -------
     Doc = write_sbol(filled_library, filled_library_metadata, filled_description)

    """
    doc = Document()
    Config.setOption('sbol_typed_uris', False)

    #Define SBOL object and components
    #Parts Library
    molecule_type = BIOPAX_DNA  #Change later
    part_column = "Part Name"
    sequence_column = "Sequence"
    description_column = "Description (Optional)"
    role_column = "Role"
    length_column = "length (bp)"

    for index, row in filled_library.iterrows():
        component = ComponentDefinition(row[part_column], molecule_type)
        component.roles = ontology[row[role_column]]
        component.name = row[part_column]
        if not (pd.isnull(row[description_column])):
            component.description = row[description_column]
        doc.addComponentDefinition(component)

        row[sequence_column] = "".join(row[sequence_column].split())
        row[sequence_column] = row[sequence_column].replace(u"\ufeff", "")
        row[sequence_column] = row[sequence_column].lower()
        if len(row[sequence_column]) != row[length_column]:
            logging.warning(
                f"The length of the sequence {row[part_column]} does not coincide with the length in the corresponding column 'length (bp)'"
            )
        sequence = Sequence(f"{row[part_column]}_sequence",
                            row[sequence_column], SBOL_ENCODING_IUPAC)
        doc.addSequence(sequence)
        component.sequences = sequence

    #Metadata
    doc.description = str(filled_description.values)
    doc.name = filled_library_metadata.iloc[0, 1]

    doc.write('SBOL_testcollection.xml')

    return (doc)
예제 #2
0
    def get_sbol(self, construct_id='BASIC_construct', validate=False):
        """Get the SBOL string representation of the construct.

        The object outputted is SBOL document which can be written
        to a file using the "writeString" method.

        WARNING: validation needs internet connexion.

        :param construct_id: Construct object ID
        :type construct_id: str
        :param validate: perform online SBOL validation
        :type validate: bool
        :return: SBOL object
        :rtype: <sbol.Document>
        """

        _SBOL_ROLE_ASSOC = {
            'misc': SO_MISC,
            'promoter': SO_PROMOTER,
            'rbs': SO_RBS,
            'cds': SO_CDS,
            'ori': SO_CIRCULAR
        }

        setHomespace('https://localhost')
        doc = Document()

        # Check duplicated part
        part_seen = set()
        dup_part_ids = set()
        for part in self._parts:
            if part.id not in part_seen:
                part_seen.add(part.id)
            else:
                dup_part_ids.add(part.id)

        components = []
        for part in self._parts:

            # Deal with duplicated part
            if part.get_sbol_id() in dup_part_ids:
                i = 0
                part_id = f'{part.get_sbol_id()}_{i}'
                while part_id in doc.componentDefinitions:
                    i += 1
                    part_id = f'{part.get_sbol_id()}_{i}'
            else:
                part_id = part.get_sbol_id()

            # Build part
            component = ComponentDefinition(part_id)
            component.roles = _SBOL_ROLE_ASSOC[part.biological_role]
            component.sequence = Sequence(part_id, part.seq)
            doc.addComponentDefinition(component)
            components.append(component)

        plasmid = ComponentDefinition(construct_id)
        doc.addComponentDefinition(plasmid)
        plasmid.assemblePrimaryStructure(components)

        if validate:
            logging.info(doc.validate())

        return doc