def test_list_wrapping(self): # Ensure that at least certain properties handle automatic list # wrapping and are typed to do so. # See https://github.com/SynBioDex/pySBOL3/issues/301 sbol3.set_namespace('https://github.com/synbiodex/pysbol3') instance_uri = 'https://example.org/instance' seq1 = sbol3.Sequence('seq1') test_loc = sbol3.EntireSequence(seq1) seq2 = sbol3.Sequence('seq2') test_source_loc = sbol3.EntireSequence(seq1) subcomp1 = sbol3.SubComponent(instance_of=instance_uri, locations=test_loc, source_locations=test_source_loc) self.assertEqual([test_loc], subcomp1.locations) self.assertEqual([test_source_loc], subcomp1.source_locations)
def compute_sequence(component: sbol3.Component) -> sbol3.Sequence: """Compute the sequence of a component and add this information into the Component in place :param component: Component whose sequence is to be computed :return: Sequence that has been computed """ sorted, circular = order_subcomponents(component) # make the blank sequence sequence = sbol3.Sequence( component.display_id + "_sequence", encoding='https://identifiers.org/edam:format_1207' ) # ### BUG: pySBOL #185 sequence.elements = '' # Should be in keywords, except pySBOL3 #208 # for each component in turn, add it and set its location for i in range(len(sorted)): subc = sorted[i].instance_of.lookup() assert len(subc.sequences) == 1 subseq = subc.sequences[0].lookup() assert sequence.encoding == subseq.encoding sorted[i].locations.append( sbol3.Range(sequence, len(sequence.elements) + 1, len(sequence.elements) + len(subseq.elements))) sequence.elements += subseq.elements # when all have been handled, the sequence is fully realized component.document.add(sequence) component.sequences.append(sequence) return sequence
def test_cloning_with_children(self): # This test does not use `sbol3.set_namespace` as the other # cloning unit tests do. This is on purpose to verify that # cloning does not rely on the default namespace. doc = sbol3.Document() namespace = 'https://github.com/synbiodex/pysbol3' c1_identity = posixpath.join(namespace, 'c1') c2_identity = posixpath.join(namespace, 'c2') s1_identity = posixpath.join(namespace, 's1') c1 = sbol3.Component(c1_identity, sbol3.SBO_DNA) doc.add(c1) seq1 = sbol3.Sequence(s1_identity) doc.add(seq1) c1.sequences.append(seq1) sc1 = sbol3.SubComponent(c1) es1 = sbol3.EntireSequence(seq1) sc1.source_locations.append(es1) c1.features = [sc1] c2 = c1.clone(c2_identity) self.assertEqual(c2_identity, c2.identity) self.assertIsNone(c2.document) # Check on the SubComponent sc2 = c2.features[0] self.assertIsInstance(sc2, sbol3.SubComponent) self.assertNotEqual(sc1.identity, sc2.identity) self.assertTrue(sc2.identity.startswith(c2.identity)) # Ensure that the reference was updated properly self.assertEqual(c2.identity, sc2.instance_of) self.assertIsNone(sc2.document) es2 = sc2.source_locations[0] self.assertIsInstance(es2, sbol3.EntireSequence) self.assertNotEqual(es1.identity, es2.identity) self.assertTrue(es2.identity.startswith(c2.identity)) self.assertEqual(es1.sequence, es2.sequence) self.assertIsNone(es2.document)
def test_full_constructor(self): identity = 'https://github.com/synbiodex/pysbol3/s1' elements = 'GCAT' encoding = sbol3.IUPAC_DNA_ENCODING attachments = ['https://github.com/synbiodex/pysbol3/attachment1'] name = None description = None derived_from = ['https://github.com/synbiodex/pysbol3/parent1'] generated_by = ['https://github.com/synbiodex/pysbol3/tool1'] m1 = sbol3.Measure(value=2.3, unit='meter') measures = [m1] s1 = sbol3.Sequence(identity=identity, elements=elements, encoding=encoding, attachments=attachments, name=name, description=description, derived_from=derived_from, generated_by=generated_by, measures=measures) self.assertEqual(identity, s1.identity) self.assertEqual(elements, s1.elements) self.assertEqual(encoding, s1.encoding) self.assertEqual(attachments, s1.attachments) self.assertEqual(name, s1.name) self.assertEqual(description, s1.description) self.assertEqual(derived_from, s1.derived_from) self.assertEqual(generated_by, s1.generated_by) self.assertEqual(measures, s1.measures)
def sequence(self): # might need to be careful if the object type is sequence! if re.fullmatch( r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', self.cell_val): # if a url self.obj.sequences = [self.cell_val] elif re.match(r'^[a-zA-Z \s*]+$', self.cell_val): # if a sequence string # removes spaces, enters, and makes all lower case self.cell_val = "".join(self.cell_val.split()) self.cell_val = self.cell_val.replace(u"\ufeff", "").lower() # create sequence object sequence = sbol3.Sequence(f"{self.obj.displayId}_sequence", elements=self.cell_val) if self.obj.name is not None: sequence.name = f"{self.obj.name} Sequence" self.doc.add(sequence) # link sequence object to component definition self.obj.sequences = [sequence] else: logging.warning( f'The cell value for {self.obj.identity} is not an accepted sequence type, it has been added as a uri and left for post processing. Sequence value provided: {self.cell_val} (sheet:{self.sheet}, row:{self.sht_row}, col:{self.sht_col})' ) self.obj.sequences = [self.cell_val]
def test_list_wrapping(self): # Ensure that at least certain properties handle automatic list # wrapping and are typed to do so. # See https://github.com/SynBioDex/pySBOL3/issues/301 sbol3.set_namespace('https://github.com/synbiodex/pysbol3') source_uri = 'https://example.org/source' derived_from_uri = 'https://example.org/derived_from' statute_mile = sbol3.OM_NS + 'mile-Statute' comp1_type = sbol3.SBO_DNA comp1_role = sbol3.SO_PROMOTER comp1_seq1 = sbol3.Sequence('seq1') comp1_model = sbol3.Model('model1', source=source_uri, language='https://example.org/language', framework='https://example.org/framework') comp1_attachment = sbol3.Attachment('att1', source=source_uri) comp1_measure = sbol3.Measure(value=26.2, unit=statute_mile) comp1_activity = sbol3.Activity('activity1') comp1 = sbol3.Component('comp1', types=comp1_type, sequences=comp1_seq1, roles=comp1_role, models=comp1_model, attachments=comp1_attachment, derived_from=derived_from_uri, measures=comp1_measure, generated_by=comp1_activity) self.assertEqual([comp1_type], comp1.types) self.assertEqual([comp1_seq1.identity], comp1.sequences) self.assertEqual([comp1_role], comp1.roles) self.assertEqual([comp1_model.identity], comp1.models) self.assertEqual([comp1_attachment.identity], comp1.attachments) self.assertEqual([derived_from_uri], comp1.derived_from) self.assertEqual([comp1_measure], comp1.measures) self.assertEqual([comp1_activity.identity], comp1.generated_by)
def test_singleton_wrapping_urls(self): # See https://github.com/SynBioDex/pySBOL3/issues/301 sbol3.set_namespace('https://github.com/synbiodex/pysbol3') process1 = 'https://example.com/thing' thing = sbol3.Sequence('thing1', derived_from=process1) self.assertEqual(1, len(thing.derived_from)) self.assertEqual(process1, thing.derived_from[0])
def test_create(self): sbol3.set_namespace('https://github.com/synbiodex/pysbol3') display_id = 'seq1' seq = sbol3.Sequence(display_id) self.assertIsNotNone(seq) self.assertEqual(display_id, seq.display_id) self.assertIsNone(seq.elements) self.assertIsNone(seq.encoding)
def test_initial_value(self): # See https://github.com/SynBioDex/pySBOL3/issues/208 identity = 'https://github.com/synbiodex/pysbol3/s1' elements = '' # encoding = sbol3.IUPAC_DNA_ENCODING s1 = sbol3.Sequence(identity=identity, elements=elements) self.assertEqual(identity, s1.identity) self.assertEqual(elements, s1.elements)
def test_invalid(self): sbol3.set_namespace('https://github.com/synbiodex/pysbol3') display_id = 'seq1' seq = sbol3.Sequence(display_id) self.assertIsNotNone(seq) seq.elements = 'actg' report = seq.validate() self.assertIsNotNone(report) self.assertEqual(1, len(report.errors))
def test_list_wrapping(self): # Ensure that at least certain properties handle automatic list # wrapping and are typed to do so. # See https://github.com/SynBioDex/pySBOL3/issues/301 sbol3.set_namespace('https://github.com/synbiodex/pysbol3') seq = sbol3.Sequence('seq1') test_loc = sbol3.EntireSequence(seq) seq_feat1 = sbol3.SequenceFeature(locations=test_loc) self.assertEqual([test_loc], seq_feat1.locations)
def test_valid(self): sbol3.set_namespace('https://github.com/synbiodex/pysbol3') display_id = 'seq1' seq = sbol3.Sequence(display_id) self.assertIsNotNone(seq) seq.elements = 'actg' seq.encoding = sbol3.IUPAC_DNA_ENCODING # Should not raise a ValidationError report = seq.validate() self.assertEqual(0, len(report))
def test_add(self): sbol3.set_namespace('https://github.com/synbiodex/pysbol3') doc = sbol3.Document() non_top_level = sbol3.Interface() with self.assertRaises(TypeError): doc.add(non_top_level) seq = sbol3.Sequence('seq1') doc.add(seq) seq2 = doc.find(seq.identity) self.assertEqual(seq.identity, seq2.identity)
def test_cloning_with_references(self): sbol3.set_namespace('https://github.com/synbiodex/pysbol3') c1 = sbol3.Component('c1', sbol3.SBO_DNA) new_identity = 'c2' seq1 = sbol3.Sequence('s1') c1.sequences.append(seq1) c2 = c1.clone(new_identity) self.assertEqual(posixpath.join(sbol3.get_namespace(), new_identity), c2.identity) self.assertListEqual(list(c1.sequences), list(c2.sequences))
def test_list_wrapping(self): # Ensure that at least certain properties handle automatic list # wrapping and are typed to do so. # See https://github.com/SynBioDex/pySBOL3/issues/301 sbol3.set_namespace('https://github.com/synbiodex/pysbol3') test_type = sbol3.SBO_DNA seq = sbol3.Sequence('seq1') test_loc = sbol3.EntireSequence(seq) lsc = sbol3.LocalSubComponent(types=test_type, locations=test_loc) self.assertEqual([test_type], lsc.types) self.assertEqual([test_loc], lsc.locations)
def test_generated_by(self): # See https://github.com/SynBioDex/pySBOL3/issues/301 sbol3.set_namespace('https://github.com/synbiodex/pysbol3') act1 = sbol3.Activity('act1') act2 = sbol3.Activity('act2') elements = 'acgt' seq1 = sbol3.Sequence(identity='seq1', elements=elements) self.assertListEqual([], list(seq1.generated_by)) # test a list of items activities = [act1, act2] seq2 = sbol3.Sequence(identity='seq2', elements=elements, generated_by=activities) self.assertListEqual([a.identity for a in activities], list(seq2.generated_by)) # test a singleton, which should gracefully be marshalled into a list activity = act1 seq3 = sbol3.Sequence(identity='seq3', elements=elements, generated_by=activity) self.assertListEqual([activity.identity], list(seq3.generated_by))
def test_list_wrapping(self): # Ensure that at least certain properties handle automatic list # wrapping and are typed to do so. # See https://github.com/SynBioDex/pySBOL3/issues/301 sbol3.set_namespace('https://github.com/synbiodex/pysbol3') seq = sbol3.Sequence('seq1') test_loc = sbol3.EntireSequence(seq) variable_uri = 'https://example.org/variable' var_coll_uri = 'https://example.org/collection' var_feat1 = sbol3.VariableFeature(cardinality=sbol3.SBOL_ZERO_OR_MORE, variable=variable_uri, variant_collections=var_coll_uri) self.assertEqual([var_coll_uri], var_feat1.variant_collections)
def test_add(self): sbol3.set_namespace('https://github.com/synbiodex/pysbol3') doc = sbol3.Document() non_top_level = sbol3.Interface() with self.assertRaises(TypeError): doc.add(non_top_level) seq = sbol3.Sequence('seq1') added_seq = doc.add(seq) # Document.add should return the object # See https://github.com/SynBioDex/pySBOL3/issues/272 self.assertEqual(seq, added_seq) seq2 = doc.find(seq.identity) self.assertEqual(seq.identity, seq2.identity)
def test_cascade_identity(self): # Test that updating identity of an owned object cascades # to child owned objects sbol3.set_namespace('https://github.com/synbiodex/pysbol3') c1 = sbol3.Component('c1', sbol3.SBO_DNA) seq = sbol3.Sequence('seq1') loc = sbol3.EntireSequence(seq) seq_feature = sbol3.SequenceFeature([loc]) c1.features.append(seq_feature) self.assertIsNotNone(seq_feature.identity) # identity should cascade down to the location after it # is set on the sequence feature self.assertIsNotNone(loc.identity)
def test_invalid_encoding(self): sbol3.set_namespace('https://github.com/synbiodex/pysbol3') display_id = 'seq1' seq = sbol3.Sequence(display_id) self.assertIsNotNone(seq) seq.elements = 'actg' # This is an encoding from SBOL 3.0. It is no longer # valid as of 3.0.1/3.1. seq.encoding = 'http://sbols.org/v3#iupacNucleicAcid' # We expect 1 warning for the encoding that is not in the # recommended set. report = seq.validate() self.assertEqual(1, len(report)) self.assertEqual(1, len(report.warnings))
def test_uri_assignment(self): # Test assignment to a ReferencedObject attribute with a URI string sbol3.set_namespace('https://github.com/synbiodex/pysbol3') doc = sbol3.Document() component = sbol3.Component('c1', sbol3.SBO_DNA) sequence = sbol3.Sequence('seq1') doc.add(component) doc.add(sequence) component.sequences.append(sequence.identity) seq2_uri = component.sequences[0] self.assertEqual(sequence.identity, seq2_uri) seq = seq2_uri.lookup() self.assertIsNotNone(seq) self.assertEqual(sequence, seq)
def test_singleton_assignment(self): # Test assignment to a ReferencedObject attribute with an # instance using assignment sbol3.set_namespace('https://github.com/synbiodex/pysbol3') doc = sbol3.Document() test_parent = SingleRefObj('sro1') sequence = sbol3.Sequence('seq1') doc.add(test_parent) doc.add(sequence) test_parent.sequence = sequence seq2_uri = test_parent.sequence self.assertEqual(sequence.identity, seq2_uri) seq = seq2_uri.lookup() self.assertIsNotNone(seq) self.assertEqual(sequence, seq)
def test_read_default_namespace(self): # This is a modified version of the initial bug report for # https://github.com/SynBioDex/pySBOL3/issues/337 doc = sbol3.Document() sbol3.set_namespace('http://foo.org') doc.add(sbol3.Sequence('bar')) self.assertEqual(0, len(doc.validate())) file_format = sbol3.SORTED_NTRIPLES data = doc.write_string(file_format=file_format) doc2 = sbol3.Document() doc2.read_string(data, file_format=file_format) # Successful read sbol3.set_namespace('http://baz.com/') doc3 = sbol3.Document() doc3.read_string(data, file_format=file_format)
def test_validate(self): # Test the document level validation # This should validate all the objects in the document # and return a report containing all errors and warnings. doc = sbol3.Document() c1 = sbol3.Component('https://github.com/synbiodex/pysbol3/c1', sbol3.SBO_DNA) doc.add(c1) s1 = sbol3.Sequence('https://github.com/synbiodex/pysbol3/s1') doc.add(s1) start = 10 end = 1 r = sbol3.Range(s1, start, end) sf = sbol3.SequenceFeature([r]) c1.features.append(sf) report = doc.validate() # We should find the validation issue in the Range self.assertEqual(1, len(report))
def test_sequence_validators(self): assert unambiguous_dna_sequence('actGATCG') assert not unambiguous_dna_sequence('this is a non-DNA string') assert unambiguous_rna_sequence('acugaucg') assert not unambiguous_rna_sequence('actgatcg') assert unambiguous_protein_sequence('tklqpntvir') assert not unambiguous_protein_sequence('tklqxpntvir') s = sbol3.Sequence('http://test.org/seq', namespace='http://test.org/', encoding=sbol3.IUPAC_DNA_ENCODING, elements='acgacg') assert unambiguous_dna_sequence(s) assert unambiguous_rna_sequence( s) # Because DNA and RNA use the same EDAM identifier assert not unambiguous_protein_sequence(s) s.elements = 'tklqpntvir' assert not unambiguous_rna_sequence(s) assert not unambiguous_protein_sequence(s) s.encoding = sbol3.IUPAC_PROTEIN_ENCODING assert unambiguous_protein_sequence(s)
def test_change_object_namespace_ref(self): # test with a referenced object, like a component with a sequence namespace = 'https://github.com/synbiodex/pysbol3' sbol3.set_namespace(namespace) doc = sbol3.Document() s1 = sbol3.Sequence('s1') c1 = sbol3.Component('c1', types=[sbol3.SBO_DNA], sequences=[s1]) doc.add([s1, c1]) self.assertEqual(2, len(doc)) self.assertEqual(s1.identity, c1.sequences[0]) ns2 = 'https://example.com/test_ns' s1_identity = s1.identity c1_identity = c1.identity doc.change_object_namespace([s1, c1], ns2) self.assertEqual(ns2, s1.namespace) self.assertEqual(ns2, c1.namespace) self.assertTrue(s1.identity.startswith(ns2)) self.assertTrue(c1.identity.startswith(ns2)) self.assertNotEqual(s1_identity, s1.identity) self.assertNotEqual(c1_identity, c1.identity) self.assertEqual(s1.identity, c1.sequences[0])
def test_overwrite_identity(self): sbol3.set_namespace('https://github.com/synbiodex/pysbol3') c1 = sbol3.Component('c1', sbol3.SBO_DNA) seq = sbol3.Sequence('seq1') loc = sbol3.EntireSequence(seq) seq_feature = sbol3.SequenceFeature([loc]) c1.features.append(seq_feature) self.assertIsNotNone(seq_feature.identity) # identity should cascade down to the location after it # is set on the sequence feature self.assertIsNotNone(loc.identity) old_sf_identity = seq_feature.identity old_loc_identity = loc.identity c2 = sbol3.Component('c2', sbol3.SBO_DNA) # Try adding the same object to a different parent # This should cause an error because the object is # still parented by c1. # See https://github.com/SynBioDex/pySBOL3/issues/178 with self.assertRaises(ValueError): c2.features.append(seq_feature) self.assertEqual(old_loc_identity, loc.identity) self.assertEqual(old_sf_identity, seq_feature.identity)
def row_to_basic_part(doc: sbol3.Document, row, basic_parts: sbol3.Collection, linear_products: sbol3.Collection, final_products: sbol3.Collection, config: dict, source_table: dict): """ Read a row for a basic part and turn it into SBOL Component :param doc: Document to add parts to :param row: Excel row to be processed :param basic_parts: collection of parts to add to :param linear_products: collection of linear parts to add to :param final_products: collection of final parts to add to :param config: dictionary of sheet parsing configuration variables :param source_table: dictionary mapping source names to namespaces :return: None """ # Parse material from sheet row name = row[config['basic_name_col']].value if name is None: return # skip lines without names else: name = name.strip() # make sure we're discarding whitespace raw_role = row[config['basic_role_col']].value try: # look up with tyto; if fail, leave blank or add to description role = (tyto.SO.get_uri_by_term(raw_role) if raw_role else None) except LookupError: logging.warning( f'Role "{raw_role}" could not be found in Sequence Ontology') role = None design_notes = (row[config['basic_notes_col']].value if row[config['basic_notes_col']].value else "") description = (row[config['basic_description_col']].value if row[config['basic_description_col']].value else "") source_prefix = row[config['basic_source_prefix_col']].value source_id = row[config['basic_source_id_col']].value final_product = row[config['basic_final_col']].value # boolean circular = row[config['basic_circular_col']].value # boolean length = row[config['basic_length_col']].value raw_sequence = row[config['basic_sequence_col']].value sequence = (None if raw_sequence is None else "".join( unicodedata.normalize("NFKD", raw_sequence).upper().split())) if not ((sequence is None and length == 0) or len(sequence) == length): raise ValueError( f'Part "{name}" has mismatched sequence length: check for bad characters and extra whitespace' ) # identity comes from source if set to a literal table, from display_id if not set identity = None display_id = None was_derived_from = None namespace = sbol3.get_namespace() if source_id and source_prefix: source_prefix = source_prefix.strip() if source_prefix in source_table: if source_table[source_prefix]: display_id = sbol3.string_to_display_id(source_id.strip()) identity = f'{source_table[source_prefix]}/{display_id}' namespace = source_table[source_prefix] else: # when there is no prefix, use the bare value (in SBOL3 format) raw_url = source_id.strip() identity = url_to_identity( strip_filetype_suffix(strip_sbol2_version(raw_url))) was_derived_from = raw_url namespace = identity.rsplit( '/', 1)[0] # TODO: use a helper function else: logging.info( f'Part "{name}" ignoring non-literal source: {source_prefix}') elif source_id: logging.warning( f'Part "{name}" has source ID specified but not prefix: {source_id}' ) elif source_prefix: logging.warning( f'Part "{name}" has source prefix specified but not ID: {source_prefix}' ) if not identity: display_id = sbol3.string_to_display_id(name) # build a component from the material logging.debug(f'Creating basic part "{name}"') component = sbol3.Component( identity or display_id, sbol3.SBO_DNA, name=name, namespace=namespace, description=f'{design_notes}\n{description}'.strip()) if was_derived_from: component.derived_from.append(was_derived_from) doc.add(component) if role: component.roles.append(role) if circular: component.types.append(sbol3.SO_CIRCULAR) if sequence: sbol_seq = sbol3.Sequence(f'{component.identity}_sequence', namespace=namespace, encoding=sbol3.IUPAC_DNA_ENCODING, elements=sequence) doc.add(sbol_seq) component.sequences.append(sbol_seq.identity) # add the component to the appropriate collections basic_parts.members.append(component.identity) if final_product: linear_products.members.append(component.identity) final_products.members.append(component.identity)