def test_standard_assembly(self): doc = sbol2.Document() gene = sbol2.ComponentDefinition("BB0001") promoter = sbol2.ComponentDefinition("R0010") RBS = sbol2.ComponentDefinition("B0032") CDS = sbol2.ComponentDefinition("E0040") terminator = sbol2.ComponentDefinition("B0012") promoter.sequence = sbol2.Sequence('R0010') RBS.sequence = sbol2.Sequence('B0032') CDS.sequence = sbol2.Sequence('E0040') terminator.sequence = sbol2.Sequence('B0012') promoter.sequence.elements = 'a' RBS.sequence.elements = 't' CDS.sequence.elements = 'c' terminator.sequence.elements = 'g' promoter.roles = sbol2.SO_PROMOTER RBS.roles = sbol2.SO_RBS CDS.roles = sbol2.SO_CDS terminator.roles = sbol2.SO_TERMINATOR doc.addComponentDefinition(gene) gene.assemblePrimaryStructure([promoter, RBS, CDS, terminator], sbol2.IGEM_STANDARD_ASSEMBLY) target_seq = gene.compile() self.assertEqual(target_seq, 'atactagagttactagctactagagg')
def test_compile_sequence(self): doc = sbol2.Document() sbol2.Config.setOption(sbol2.ConfigOptions.SBOL_TYPED_URIS, True) gene = sbol2.ComponentDefinition("BB0001") promoter = sbol2.ComponentDefinition("R0010") CDS = sbol2.ComponentDefinition("B0032") RBS = sbol2.ComponentDefinition("E0040") terminator = sbol2.ComponentDefinition("B0012") scar = sbol2.ComponentDefinition('scar') promoter.sequence = sbol2.Sequence('R0010') RBS.sequence = sbol2.Sequence('B0032') CDS.sequence = sbol2.Sequence('E0040') terminator.sequence = sbol2.Sequence('B0012') scar.sequence = sbol2.Sequence('scar') promoter.sequence.elements = 'aaa' RBS.sequence.elements = 'aaa' CDS.sequence.elements = 'aaa' terminator.sequence.elements = 'aaa' scar.sequence.elements = 'ttt' doc.addComponentDefinition(gene) gene.assemblePrimaryStructure( [promoter, scar, RBS, scar, CDS, scar, terminator]) target_seq = gene.compile() self.assertEqual(target_seq, 'aaatttaaatttaaatttaaa') self.assertEqual(target_seq, gene.sequence.elements)
def test_assemble_with_displayIds(self): sbol2.Config.setOption(sbol2.ConfigOptions.SBOL_TYPED_URIS, True) doc = sbol2.Document() gene = sbol2.ComponentDefinition("BB0001") promoter = sbol2.ComponentDefinition("R0010") RBS = sbol2.ComponentDefinition("B0032") CDS = sbol2.ComponentDefinition("E0040") terminator = sbol2.ComponentDefinition("B0012") promoter.sequence = sbol2.Sequence('R0010') RBS.sequence = sbol2.Sequence('B0032') CDS.sequence = sbol2.Sequence('E0040') terminator.sequence = sbol2.Sequence('B0012') promoter.sequence.elements = 'a' RBS.sequence.elements = 't' CDS.sequence.elements = 'c' terminator.sequence.elements = 'g' promoter.roles = sbol2.SO_PROMOTER RBS.roles = sbol2.SO_RBS CDS.roles = sbol2.SO_CDS terminator.roles = sbol2.SO_TERMINATOR doc.addComponentDefinition([gene, promoter, RBS, CDS, terminator]) gene.assemblePrimaryStructure(['R0010', 'B0032', 'E0040', 'B0012']) primary_structure = gene.getPrimaryStructure() primary_structure = [c.identity for c in primary_structure] self.assertEqual(primary_structure, [ promoter.identity, RBS.identity, CDS.identity, terminator.identity ]) target_seq = gene.compile() self.assertEqual(target_seq, 'atcg')
def test_assembly_multiple_ranges(self): # Currently hierarchical assembly of Components with multiple Ranges # is not supported, and an exception will be thrown if the user attempts it doc = sbol2.Document() root = doc.componentDefinitions.create('root') sub0 = doc.componentDefinitions.create('sub0') sub1 = doc.componentDefinitions.create('sub1') sub0.sequence = sbol2.Sequence('sub0', 'tttt') sub1.sequence = sbol2.Sequence('sub1', 'aa') root.assemblePrimaryStructure([sub0, sub1]) sub0_0 = root.components['sub0_0'] sub1_0 = root.components['sub1_0'] sa0 = root.sequenceAnnotations.create('sa0') sa0.component = sub0_0 r0 = sa0.locations.createRange('r0') r1 = sa0.locations.createRange('r1') sa1 = root.sequenceAnnotations.create('sa1') sa1.component = sub1_0 r2 = sa1.locations.createRange('r2') r0.start = 1 r0.end = 2 r1.start = 3 r1.end = 4 r2.start = 5 r2.end = 6 with self.assertRaises(sbol2.SBOLError) as err: root.compile() self.assertEqual(err.exception.error_code(), sbol2.SBOLErrorCode.SBOL_ERROR_INVALID_ARGUMENT)
def test_assemble(self): doc = sbol2.Document() gene = sbol2.ComponentDefinition("BB0001") promoter = sbol2.ComponentDefinition("R0010") RBS = sbol2.ComponentDefinition("B0032") CDS = sbol2.ComponentDefinition("E0040") terminator = sbol2.ComponentDefinition("B0012") promoter.sequence = sbol2.Sequence('R0010') RBS.sequence = sbol2.Sequence('B0032') CDS.sequence = sbol2.Sequence('E0040') terminator.sequence = sbol2.Sequence('B0012') promoter.sequence.elements = 'a' RBS.sequence.elements = 't' CDS.sequence.elements = 'c' terminator.sequence.elements = 'g' promoter.roles = sbol2.SO_PROMOTER RBS.roles = sbol2.SO_RBS CDS.roles = sbol2.SO_CDS terminator.roles = sbol2.SO_TERMINATOR doc.addComponentDefinition([gene, promoter, RBS, CDS, terminator]) gene.assemblePrimaryStructure(['R0010', 'B0032', 'E0040', 'B0012']) primary_structure = gene.getPrimaryStructure() primary_structure = [c.identity for c in primary_structure] self.assertEqual(primary_structure, [ promoter.identity, RBS.identity, CDS.identity, terminator.identity ])
def test_recursive_compile(self): doc = sbol2.Document() cd1 = sbol2.ComponentDefinition('cd1') cd2 = sbol2.ComponentDefinition('cd2') cd3 = sbol2.ComponentDefinition('cd3') cd4 = sbol2.ComponentDefinition('cd4') cd5 = sbol2.ComponentDefinition('cd5') cd1.sequence = sbol2.Sequence('cd1') cd2.sequence = sbol2.Sequence('cd2') cd3.sequence = sbol2.Sequence('cd3') cd4.sequence = sbol2.Sequence('cd4') cd5.sequence = sbol2.Sequence('cd5') cd1.sequence.elements = 'tt' cd2.sequence.elements = 'gg' cd3.sequence.elements = 'n' cd4.sequence.elements = 'aa' cd5.sequence.elements = 'n' doc.addComponentDefinition([cd1, cd2, cd3, cd4, cd5]) cd3.assemblePrimaryStructure([cd1, cd2]) cd5.assemblePrimaryStructure([cd4, cd3]) cd5.compile() self.assertEqual(cd3.sequence.elements, 'ttgg') self.assertEqual(cd5.sequence.elements, 'aattgg') r1 = cd3.sequenceAnnotations['cd1_annotation_0'].\ locations['cd1_annotation_0_range'] r2 = cd3.sequenceAnnotations['cd2_annotation_0'].\ locations['cd2_annotation_0_range'] r4 = cd5.sequenceAnnotations['cd4_annotation_0'].\ locations['cd4_annotation_0_range'] self.assertEqual(r1.start, 3) self.assertEqual(r1.end, 4) self.assertEqual(r2.start, 5) self.assertEqual(r2.end, 6) self.assertEqual(r4.start, 1) self.assertEqual(r4.end, 2)
def testRemoveSequence(self): test_seq = sbol2.Sequence("R0010", "ggctgca") doc = sbol2.Document() doc.addSequence(test_seq) doc.sequences.remove(0) with self.assertRaises(sbol2.SBOLError): doc.sequences.get("R0010")
def test_import_into_typed_namespace_from_nontyped_namespace(self): # Copy an sbol-typed URI to a non-typed, sbol-compliant URI sbol.setHomespace('http://examples.org') sbol.Config.setOption(sbol2.ConfigOptions.SBOL_TYPED_URIS, False) sbol.Config.setOption(sbol2.ConfigOptions.SBOL_COMPLIANT_URIS, True) doc = sbol.Document() comp = sbol.ComponentDefinition('cd') seq = sbol.Sequence('seq') comp.sequences = seq.identity doc.addComponentDefinition(comp) doc.addSequence(seq) # Import the object into the new namespace sbol.Config.setOption(sbol2.ConfigOptions.SBOL_TYPED_URIS, True) old_homespace = sbol.getHomespace() sbol.setHomespace('http://acme.com') comp_copy = comp.copy(None, old_homespace) # Verify new namespace was correctly substituted and type token was successfully # added self.assertEqual(comp_copy.identity, 'http://acme.com/ComponentDefinition/cd/1') self.assertEqual(comp_copy.persistentIdentity, 'http://acme.com/ComponentDefinition/cd') self.assertEqual(comp_copy.sequences[0], 'http://acme.com/Sequence/seq/1') # Verify wasDerivedFrom relationship self.assertEqual(comp_copy.wasDerivedFrom[0], comp.identity)
def testAddSequence(self): test_seq = sbol2.Sequence("R0010", "ggctgca") doc = sbol2.Document() doc.addSequence(test_seq) seq = doc.sequences.get("R0010").elements self.assertEqual(seq, 'ggctgca')
def test_import_object_into_new_namespace(self): # When copying an object into a new namespace, confirm that it's URI is copied # into the new namespace. Also confirm that any ReferencedObject attributes # whose values point to an object in the old namespace are also copied into the # new namespace sbol.setHomespace('http://examples.org') sbol.Config.setOption(sbol2.ConfigOptions.SBOL_COMPLIANT_URIS, True) sbol.Config.setOption(sbol2.ConfigOptions.SBOL_TYPED_URIS, False) doc = sbol.Document() comp = sbol.ComponentDefinition('cd') seq = sbol.Sequence('seq') doc.addComponentDefinition(comp) doc.addSequence(seq) comp.sequences = seq.identity # Import from old homespace into new homespace old_homespace = sbol.getHomespace() sbol.setHomespace('http://acme.com') comp_copy = comp.copy(None, old_homespace) # Verify new namespace was correctly substituted self.assertEqual(comp_copy.identity, 'http://acme.com/cd/1') self.assertEqual(comp_copy.persistentIdentity, 'http://acme.com/cd') self.assertEqual(comp_copy.sequences[0], 'http://acme.com/seq/1') # Verify wasDerivedFrom relationship self.assertEqual(comp_copy.wasDerivedFrom[0], comp.identity) # Ensure these are equal under the covers self.assertEqual(type(comp.properties[sbol.SBOL_SEQUENCE_PROPERTY][0]), rdflib.URIRef) self.assertEqual(type(comp.properties[sbol.SBOL_SEQUENCE_PROPERTY][0]), type(comp_copy.properties[sbol.SBOL_SEQUENCE_PROPERTY][0]))
def sequence(self): # might need to be careful if the object type is sequence! if re.fullmatch( r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', self.cell_val): # if a url self.obj.sequences = self.cell_val elif re.match(r'^[a-zA-Z \s*]+$', self.cell_val): # if a sequence string # removes spaces, enters, and makes all lower case self.cell_val = "".join(self.cell_val.split()) self.cell_val = self.cell_val.replace(u"\ufeff", "").lower() # create sequence object sequence = sbol2.Sequence(f"{self.obj.displayId}_sequence", self.cell_val, sbol2.SBOL_ENCODING_IUPAC) if self.obj.name is not None: sequence.name = f"{self.obj.name} Sequence" self.doc.addSequence(sequence) # link sequence object to component definition self.obj.sequences = sequence else: self.obj.sequences = self.cell_val logging.warning( f'The cell value for {self.obj.identity} is not an accepted sequence type but it has been left for post processing. Sequence value provided: {self.cell_val} (sheet:{self.sheet}, row:{self.sht_row}, col:{self.sht_col})' )
def test_referenced_object(self): # Test referenced object property is initialized to correct types cd0 = sbol.ComponentDefinition('cd0') self.assertEqual(type(cd0.sequences), list) c = cd0.components.create('c') self.assertEqual(c.definition, None) # Test assignment cd1 = sbol.ComponentDefinition('cd1') c.definition = cd1.identity self.assertEqual(c.definition, cd1.identity) seq0a = sbol.Sequence('seq0a') seq0b = sbol.Sequence('seq0b') cd0.sequences = [seq0a.identity, seq0b.identity] self.assertEqual(cd0.sequences, [seq0a.identity, seq0b.identity]) c.definition = cd1 self.assertEqual(c.definition, cd1.identity) # Test conversion to URIRef c.definition = str(cd1.identity) self.assertEqual(type(c.definition), str) cd0.sequences = [str(seq0a.identity), str(seq0b.identity)] self.assertEqual([type(s) for s in cd0.sequences], [str, str]) # Test unset c.definition = None self.assertEqual(c.definition, None) c.definition = cd1.identity c.definition = '' self.assertEqual(c.definition, None) cd0.sequences = [] self.assertEqual(cd0.sequences, []) cd0.sequences = [seq0a.identity, seq0b.identity] cd0.sequences = None self.assertEqual(cd0.sequences, []) with self.assertRaises(TypeError): cd0.sequences = [seq0a.identity, seq0b.identity] cd0.sequences = [None, None] self.assertEqual(cd0.sequences, [])
def test_remove_hidden_sequence(self): # Objects contained in a hidden property shouldn't persist if they are # removed from the Document top level doc = sbol2.Document() cd = doc.componentDefinitions.create('cd1') cd.sequence = sbol2.Sequence('cd1_sequence') self.assertIn('cd1_sequence', doc.sequences) doc.sequences.remove('cd1_sequence') self.assertIsNone(cd.sequence)
def test_hidden_property_adder(self): # Assignment of a TopLevel object to a hidden property (in this case # assigning a Sequence object to the sequence property) should # simultaneously add that object to the Document top level doc = sbol2.Document() cd = doc.componentDefinitions.create('cd') cd.sequence = sbol2.Sequence('seq') self.assertIsNotNone(cd.sequence) self.assertIs(cd.sequence, doc.getSequence(cd.sequence.identity))
def test_assembly_multiple_sequenceannotations(self): # Currently hierarchical assembly of Components with multiple SequenceAnnotations # is not supported, and an exception will be thrown if the user attempts it doc = sbol2.Document() root = doc.componentDefinitions.create('root') sub0 = doc.componentDefinitions.create('sub0') sub1 = doc.componentDefinitions.create('sub1') sub0.sequence = sbol2.Sequence('sub0', 'tttt') sub1.sequence = sbol2.Sequence('sub1', 'aa') root.assemblePrimaryStructure([sub0, sub1]) root.compile() # Add a second SequenceAnnotation sa = root.sequenceAnnotations.create('sub0_annotation_1') sa.component = root.components['sub0_0'] with self.assertRaises(sbol2.SBOLError) as err: root.compile() self.assertEqual(err.exception.error_code(), sbol2.SBOLErrorCode.SBOL_ERROR_INVALID_ARGUMENT)
def test_recursive_add(self): # Make sure that when an object gets added to a document # all of its child objects also get added. cd = sbol2.ComponentDefinition('cd') comp = sbol2.Component('cd_c') cd.components.add(comp) # Use of cd.sequence is dubious because the sequence attribute # isn't really there in SBOL 2.3. But it's the test case that # found the bug with recursive addition of objects, so we use it. seq = sbol2.Sequence('cd_seq') cd.sequence = seq doc = sbol2.Document() doc.addComponentDefinition(cd) # The cd and sequence should be in the document # The component is not top level, so doesn't get added self.assertEqual(2, len(doc))
def parse_parts_to_sbh(build_request, ontology_terms, parts_doc): # parse excel doc for only parts and create parts dataframe parts_df = build_request.loc['Part Name':'Composite DNA Parts'] parts_df.columns = parts_df.iloc[0] parts_df = parts_df.drop(parts_df.index[0]) parts_df = parts_df.loc[parts_df.index.dropna()] parts_df = parts_df.drop(['Composite DNA Parts']) # iterate over parts dataframe for part_name, part_info in parts_df.iterrows(): part_displayid = '_'.join(part_name.split(' ')) # create part component definition part_cd = sbol.ComponentDefinition(part_displayid, sbol.BIOPAX_DNA) part_cd.name = part_name part_cd.description = part_info['Description (Optional)'] # check if Role for part is given if not pd.isnull(part_info['Role']): if part_info['Role'] in ontology_terms.index: # grab role uri from ontology term excel sheet role_uri = ontology_terms.loc[part_info['Role']].values[0] part_cd.roles = part_cd.roles + [role_uri] # check if Sequence for part is given if not pd.isnull(part_info['Sequence']): # add sequence information to part component definition part_seq = sbol.Sequence('{}_sequence'.format(part_cd.displayId), part_info['Sequence']) part_cd.sequence = part_seq # TODO: Fix adding Source information # This doesn't seem to work # check if Source for part is given if not pd.isnull(part_info['Source (Optional)']): # add source information to part component definition part_source = part_info['Source (Optional)'] part_cd.Source = part_source parts_doc.addComponentDefinition(part_cd)
def test_hidden_sequence(self): # Sequence should be hidden when writing SBOL doc = sbol2.Document() cd = sbol2.ComponentDefinition('cd1', sbol2.BIOPAX_DNA) cd.name = 'cd1-name' cd.description = 'cd1-description' seq = sbol2.Sequence('cd1_sequence', 'GCAT') cd.sequence = seq doc.addComponentDefinition(cd) xml = doc.writeString() graph = rdflib.Graph() graph.parse(data=xml, format='xml') # We shouldn't find SBOL_SEQUENCE within the component definition identity_uri = rdflib.URIRef(cd.identity) bad_triple = (identity_uri, rdflib.URIRef(sbol2.SBOL_SEQUENCE), None) self.assertEqual([], list(graph.triples(bad_triple))) good_triple = (identity_uri, rdflib.URIRef(sbol2.SBOL_SEQUENCE_PROPERTY), None) good_triples = list(graph.triples(good_triple)) self.assertEqual(len(good_triples), 1) self.assertEqual( good_triples[0], (identity_uri, rdflib.URIRef( sbol2.SBOL_SEQUENCE_PROPERTY), rdflib.URIRef(seq.identity)))
def test_bool(self): seq = sbol2.Sequence() self.assertTrue(seq)
def test_sequence_validation(self): # sequence and sequences should be synced up cd = sbol2.ComponentDefinition('cd1', sbol2.BIOPAX_DNA) seq = sbol2.Sequence('cd1_sequence', 'GCAT') cd.sequence = seq self.assertEqual([cd.sequence.identity], cd.sequences)