def from_string(cls, string): """Extract the UGroup fields from the string. The string can contains the U character at the begin or can only contains the fields of the UGroup directly. """ if len(string.split()) == 0: raise line.InvalidLineError("Cannot parse the empty string.") fields = re.split('\t', string) ugfields = [] if fields[0] == 'U': fields = fields[1:] if len(fields) < len(cls.REQUIRED_FIELDS): raise line.InvalidLineError("The minimum number of field for " + "UGroup line is not reached.") ugroup = UGroup() uid_f = fv.validate(fields[0], cls.REQUIRED_FIELDS['uid']) ugfields.append(line.Field('uid', uid_f)) references_f = fv.validate(fields[1], cls.REQUIRED_FIELDS['ids']) ugfields.append(line.Field('ids', references_f)) for field in fields[2:]: ugfields.append(line.OptField.from_string(field)) for field in ugfields: ugroup.add_field(field) return ugroup
def from_string(cls, string): """Extract the segment fields from the string. The string can contains the S character at the begin or can only contains the fields of the segment directly. """ if len(string.split()) == 0: raise line.InvalidLineError("Cannot parse the empty string.") fields = re.split('\t', string) sfields = [] if fields[0] == 'S': fields = fields[1:] if len(fields) < len(cls.REQUIRED_FIELDS): raise line.InvalidLineError("The minimum number of field for " + "SegmentV1 line is not reached.") segment = SegmentV1() name_f = fv.validate(fields[0], cls.REQUIRED_FIELDS['name']) sfields.append(line.Field('name', name_f)) seq_f = fv.validate(fields[1], cls.REQUIRED_FIELDS['sequence']) sfields.append(line.Field('sequence', seq_f)) for field in fields[2:]: sfields.append(line.OptField.from_string(field)) for field in sfields: segment.add_field(field) return segment
def test_Field(self): field = line.Field('name', '25') bf = BadField('name', '25') self.assertTrue(field == bf) del (bf.name) self.assertFalse(field == bf)
def test_invalid_line(self): """ Create a GFA1 Segment line, add to it a name and an optional field. Since it misses a required field(sequence) it shouldn't be valid. """ seg = segment.SegmentV1() seg.add_field(line.Field('name', '3')) seg.add_field(line.OptField('AC', '3', 'i')) self.assertFalse(segment.SegmentV1.is_valid(seg))
def from_string(cls, string): """Extract the Edge fields from the string. The string can contains the E character at the begin or can only contains the fields of the Edge directly. """ if len(string.split()) == 0: raise line.InvalidLineError("Cannot parse the empty string.") fields = re.split('\t', string) efields = [] if fields[0] == 'E': fields = fields[1:] if len(fields) < len(cls.REQUIRED_FIELDS): raise line.InvalidLineError("The minimum number of field for " + "Edge line is not reached.") edge = Edge() eid_f = fv.validate(fields[0], cls.REQUIRED_FIELDS['eid']) efields.append(line.Field('eid', eid_f)) sid1_f = fv.validate(fields[1], cls.REQUIRED_FIELDS['sid1']) efields.append(line.Field('sid1', sid1_f)) sid2_f = fv.validate(fields[2], cls.REQUIRED_FIELDS['sid2']) efields.append(line.Field('sid2', sid2_f)) beg1_f = fv.validate(fields[3], cls.REQUIRED_FIELDS['beg1']) efields.append(line.Field('beg1', beg1_f)) end1_f = fv.validate(fields[4], cls.REQUIRED_FIELDS['end1']) efields.append(line.Field('end1', end1_f)) beg2_f = fv.validate(fields[5], cls.REQUIRED_FIELDS['beg2']) efields.append(line.Field('beg2', beg2_f)) end2_f = fv.validate(fields[6], cls.REQUIRED_FIELDS['end2']) efields.append(line.Field('end2', end2_f)) alignment_f = fv.validate(fields[7], cls.REQUIRED_FIELDS['alignment']) efields.append(line.Field('alignment', alignment_f)) for field in fields[8:]: efields.append(line.OptField.from_string(field)) for field in efields: edge.add_field(field) return edge
def from_string(cls, string): """Extract the Gap fields from the string. The string can contains the G character at the begin or can only contains the fields of the Gap directly. """ if len(string.split()) == 0: raise line.InvalidLineError("Cannot parse the empty string.") fields = re.split('\t', string) gfields = [] if fields[0] == 'G': fields = fields[1:] if len(fields) < len(cls.REQUIRED_FIELDS): raise line.InvalidLineError("The minimum number of field for " + "Gap line is not reached.") gap = Gap() gid_f = fv.validate(fields[0], cls.REQUIRED_FIELDS['gid']) gfields.append(line.Field('gid', gid_f)) sid1_f = fv.validate(fields[1], cls.REQUIRED_FIELDS['sid1']) gfields.append(line.Field('sid1', sid1_f)) sid2_f = fv.validate(fields[2], cls.REQUIRED_FIELDS['sid2']) gfields.append(line.Field('sid2', sid2_f)) disp_f = fv.validate(fields[3], cls.REQUIRED_FIELDS['distance']) gfields.append(line.Field('distance', disp_f)) variance_f = fv.validate(fields[4], cls.REQUIRED_FIELDS['variance']) gfields.append(line.Field('variance', variance_f)) for field in fields[5:]: gfields.append(line.OptField.from_string(field)) for field in gfields: gap.add_field(field) return gap
def from_string(cls, string): """Extract the path fields from the string. The string can contains the P character at the begin or can just contains the fields of the path directly. """ if len(string.split()) == 0: raise line.InvalidLineError("Cannot parse the empty string.") fields = re.split('\t', string) pfields = [] if fields[0] == 'P': fields = fields[1:] if len(fields) < len(cls.REQUIRED_FIELDS): raise line.InvalidLineError("The minimum number of field for " + "Path line is not reached.") path = Path() path_name = fv.validate(fields[0], cls.REQUIRED_FIELDS['path_name']) sequences_names = [fv.validate(label, \ cls.REQUIRED_FIELDS['seqs_names']) \ for label in fields[1].split(",") ] overlaps = fv.validate(fields[2], cls.REQUIRED_FIELDS['overlaps']) pfields.append(line.Field('path_name', path_name)) pfields.append(line.Field('seqs_names', sequences_names)) pfields.append(line.Field('overlaps', overlaps)) for field in fields[3:]: pfields.append(line.OptField.from_string(field)) for field in pfields: path.add_field(field) return path
def from_string(cls, string): """Extract the link fields from the string. The string can contains the L character at the begin or can just contains the fields of the link directly. """ if len(string.split()) == 0: raise line.InvalidLineError("Cannot parse the empty string.") fields = re.split('\t', string) lfields = [] if fields[0] == 'L': fields = fields[1:] if len(fields) < len(cls.REQUIRED_FIELDS): raise line.InvalidLineError("The minimum number of field for " + "Link line is not reached.") link = Link() from_name = fv.validate(fields[0], cls.REQUIRED_FIELDS['from']) from_orn = fv.validate(fields[1], cls.REQUIRED_FIELDS['from_orn']) to_name = fv.validate(fields[2], cls.REQUIRED_FIELDS['to']) to_orn = fv.validate(fields[3], cls.REQUIRED_FIELDS['to_orn']) overlap = fv.validate(fields[4], cls.REQUIRED_FIELDS['overlap']) lfields.append(line.Field('from', from_name)) lfields.append(line.Field('from_orn', from_orn)) lfields.append(line.Field('to', to_name)) lfields.append(line.Field('to_orn', to_orn)) lfields.append(line.Field('overlap', overlap)) for field in fields[5:]: lfields.append(line.OptField.from_string(field)) for field in lfields: link.add_field(field) return link
def from_string(cls, string): """Extract the fragment fields from the string. The string can contains the F character at the begin or can only contains the fields of the fragment directly. """ if len(string.split()) == 0: raise line.InvalidLineError("Cannot parse the empty string.") fields = re.split('\t', string) ffields = [] if fields[0] == 'F': fields = fields[1:] if len(fields) < len(cls.REQUIRED_FIELDS): raise line.InvalidLineError("The minimum number of field for " + "Fragment line is not reached.") fragment = Fragment() sid_f = fv.validate(fields[0], cls.REQUIRED_FIELDS['sid']) ffields.append(line.Field('sid', sid_f)) external_f = fv.validate(fields[1], cls.REQUIRED_FIELDS['external']) ffields.append(line.Field('external', external_f)) sbeg_f = fv.validate(fields[2], cls.REQUIRED_FIELDS['sbeg']) ffields.append(line.Field('sbeg', sbeg_f)) send_f = fv.validate(fields[3], cls.REQUIRED_FIELDS['send']) ffields.append(line.Field('send', send_f)) fbeg_f = fv.validate(fields[4], cls.REQUIRED_FIELDS['fbeg']) ffields.append(line.Field('fbeg', fbeg_f)) fend_f = fv.validate(fields[5], cls.REQUIRED_FIELDS['fend']) ffields.append(line.Field('fend', fend_f)) alignment_f = fv.validate(fields[6], cls.REQUIRED_FIELDS['alignment']) ffields.append(line.Field('alignment', alignment_f)) for field in fields[7:]: ffields.append(line.OptField.from_string(field)) for field in ffields: fragment.add_field(field) return fragment
def from_string(cls, string): """Extract the containment fields from the string. The string can contains the C character at the begin or can only contains the fields of the containment directly. """ if len(string.split()) == 0: raise line.InvalidLineError("Cannot parse the empty string.") fields = re.split('\t', string) cfields = [] if fields[0] == 'C': fields = fields[1:] #skip the first field(the C) if len(fields) < len(cls.REQUIRED_FIELDS): raise line.InvalidLineError("The minimum number of field for " + "Containment line is not reached.") containment = Containment() from_name = fv.validate(fields[0], cls.REQUIRED_FIELDS['from']) from_orn = fv.validate(fields[1], cls.REQUIRED_FIELDS['from_orn']) to_name = fv.validate(fields[2], cls.REQUIRED_FIELDS['to']) to_orn = fv.validate(fields[3], cls.REQUIRED_FIELDS['to_orn']) pos = fv.validate(fields[4], cls.REQUIRED_FIELDS['pos']) overlap = fv.validate(fields[5], cls.REQUIRED_FIELDS['overlap']) cfields.append(line.Field('from', from_name)) cfields.append(line.Field('from_orn', from_orn)) cfields.append(line.Field('to', to_name)) cfields.append(line.Field('to_orn', to_orn)) cfields.append(line.Field('pos', pos)) cfields.append(line.Field('overlap', overlap)) for field in fields[6:]: cfields.append(line.OptField.from_string(field)) for field in cfields: containment.add_field(field) return containment
def test_line(self): """Test the different behaviour of line objects. Compare the behaviors of add_field. """ seg = segment.SegmentV1() # add a required field seg.add_field(line.Field('name', '3')) self.assertTrue(seg.fields['name'].value == '3') # add an optional field seg.add_field(line.OptField('AA', '3', 'i')) self.assertTrue(seg.fields['AA'].value == 3) # add any object with self.assertRaises(fv.InvalidFieldError): seg.add_field(3) # add a field previously added with self.assertRaises(ValueError): seg.add_field(line.OptField('AA', '3', 'i')) # add an invalid optfield with self.assertRaises(fv.InvalidFieldError): seg.add_field(line.OptField('AA', 'a', 'i')) # if a Field is passed, the method just remove # the value associated with the Field name seg_copy = copy.deepcopy(seg) seg.remove_field(line.Field('name', '4')) self.assertTrue('name' not in seg.fields) self.assertTrue(seg != seg_copy) seg.add_field(line.Field('name', '3')) # if a string is passed, remove the value associated # with name of the key given seg.remove_field('name') self.assertTrue('name' not in seg.fields) seg_copy = copy.deepcopy(seg) seg.remove_field('non_existent_field') self.assertTrue(seg_copy == seg) fields_copy = copy.deepcopy(seg.fields) self.assertTrue(seg != fields_copy) segment_fields = segment.SegmentV1.get_static_fields() for field in ('name', 'sequence', 'LN', \ 'RC', 'FC', 'KC', 'SH', 'UR'): self.assertTrue(field in segment_fields) invalid_segment = segment.SegmentV1() invalid_segment.add_field(line.Field("name", "3")) self.assertFalse(segment.SegmentV1.is_valid(invalid_segment)) # test against duck typing. # So in the case the user is trying to replicate the line class # (maybe to extend it) invalid_segment = line.Line() del (invalid_segment._type) self.assertFalse(segment.SegmentV1.is_valid(invalid_segment)) invalid_segment = segment.SegmentV1() invalid_segment._type = None invalid_segment.add_field(line.Field("name", "3")) invalid_segment.add_field(line.Field("sequence", "acgt")) self.assertFalse(segment.SegmentV1.is_valid(invalid_segment)) with self.assertRaises(fv.InvalidFieldError): invalid_segment.add_field(line.Field("AA", "3"))
def test_node(self): nod = node.Node("15", "acgt", 4) with self.assertRaises(node.InvalidNodeError): nod = node.Node("*", "3", "aCGT") correct_segment = segment.SegmentV1() correct_segment.add_field(line.Field('name', "3")) correct_segment.add_field( line.Field('sequence', "TGCAACGTATAGACTTGTCAC")) correct_segment.add_field(line.OptField('AA', "test", "Z")) correct_segment.add_field(line.OptField('AB', "test2", "Z")) self.assertTrue(segment.SegmentV1.is_valid(correct_segment)) node_ = node.Node.from_line(correct_segment) self.assertTrue(node.is_node(node_)) bad_node = BadNode(node_.nid, node_.sequence, node_.slen, node_.opt_fields) self.assertTrue(node.is_node(bad_node)) self.assertTrue(bad_node == node_) # if all the opt_fields of the left operand are not # in the opt_fields of the right operand the # nodes are not equal del (bad_node.opt_fields["AA"]) self.assertFalse(bad_node == node_) bad_node.nid = "5" self.assertFalse(bad_node == node_) del (bad_node.nid) self.assertFalse(node.is_node(bad_node)) self.assertTrue(bad_node != node_) # self.assertTrue(str(node_) == "nid : 3,\t" \ # + "sequence : TGCAACGTATAGACTTGTCAC,\t" \ # + "slen : None,\t" \ # + "opt_fields : []") fault_segment = segment.SegmentV1() fault_segment.add_field(line.Field("name", "3")) # By not adding this, the segment hasn't got all the field required self.assertFalse(segment.SegmentV1.is_valid(fault_segment)) fault_line = segment.SegmentV1() fault_line._fields["name_"] = line.Field("name", "3") fault_line._fields["sequence"] = line.Field("sequence", "TGCAACGTATAGACTTGTCAC") with self.assertRaises(line.InvalidLineError): node.Node.from_line(fault_line) # inserting a wrong field to opt_fields seg = segment.SegmentV2.from_string( "S\t3\t21\tTGCAACGTATAGACTTGTCAC\tRC:i:4") seg.fields['wrong_field'] = 42 self.assertTrue('wrong_field' in seg.fields) node_ = node.Node.from_line(seg) self.assertFalse('wrong_field' in node_.opt_fields) with self.assertRaises(node.InvalidNodeError): node.Node("3", 3, "acgt") with self.assertRaises(node.InvalidNodeError): node.Node("3", "3", "acgt acgt")