def from_string(cls, string): """Extract the UGroup fields from the string. The string can contains the U character at the begin or can only contains the fields of the UGroup directly. """ if len(string.split()) == 0: raise line.InvalidLineError("Cannot parse the empty string.") fields = re.split('\t', string) ugfields = [] if fields[0] == 'U': fields = fields[1:] if len(fields) < len(cls.REQUIRED_FIELDS): raise line.InvalidLineError("The minimum number of field for " + "UGroup line is not reached.") ugroup = UGroup() uid_f = fv.validate(fields[0], cls.REQUIRED_FIELDS['uid']) ugfields.append(line.Field('uid', uid_f)) references_f = fv.validate(fields[1], cls.REQUIRED_FIELDS['ids']) ugfields.append(line.Field('ids', references_f)) for field in fields[2:]: ugfields.append(line.OptField.from_string(field)) for field in ugfields: ugroup.add_field(field) return ugroup
def from_string(cls, string): """Extract the segment fields from the string. The string can contains the S character at the begin or can only contains the fields of the segment directly. """ if len(string.split()) == 0: raise line.InvalidLineError("Cannot parse the empty string.") fields = re.split('\t', string) sfields = [] if fields[0] == 'S': fields = fields[1:] if len(fields) < len(cls.REQUIRED_FIELDS): raise line.InvalidLineError("The minimum number of field for " + "SegmentV1 line is not reached.") segment = SegmentV1() name_f = fv.validate(fields[0], cls.REQUIRED_FIELDS['name']) sfields.append(line.Field('name', name_f)) seq_f = fv.validate(fields[1], cls.REQUIRED_FIELDS['sequence']) sfields.append(line.Field('sequence', seq_f)) for field in fields[2:]: sfields.append(line.OptField.from_string(field)) for field in sfields: segment.add_field(field) return segment
def __init__(self, name, value, field_type): if not re.fullmatch('[A-Za-z0-9]' * 2, name): raise ValueError("Invalid optfield name, given '{0}'".format(name)) if not re.fullmatch("^[ABHJZif]$", field_type): raise ValueError("Invalid type for an optional field.") self._name = name self._type = field_type self._value = fv.validate(value, field_type)
def from_string(cls, string): """Extract the Edge fields from the string. The string can contains the E character at the begin or can only contains the fields of the Edge directly. """ if len(string.split()) == 0: raise line.InvalidLineError("Cannot parse the empty string.") fields = re.split('\t', string) efields = [] if fields[0] == 'E': fields = fields[1:] if len(fields) < len(cls.REQUIRED_FIELDS): raise line.InvalidLineError("The minimum number of field for " + "Edge line is not reached.") edge = Edge() eid_f = fv.validate(fields[0], cls.REQUIRED_FIELDS['eid']) efields.append(line.Field('eid', eid_f)) sid1_f = fv.validate(fields[1], cls.REQUIRED_FIELDS['sid1']) efields.append(line.Field('sid1', sid1_f)) sid2_f = fv.validate(fields[2], cls.REQUIRED_FIELDS['sid2']) efields.append(line.Field('sid2', sid2_f)) beg1_f = fv.validate(fields[3], cls.REQUIRED_FIELDS['beg1']) efields.append(line.Field('beg1', beg1_f)) end1_f = fv.validate(fields[4], cls.REQUIRED_FIELDS['end1']) efields.append(line.Field('end1', end1_f)) beg2_f = fv.validate(fields[5], cls.REQUIRED_FIELDS['beg2']) efields.append(line.Field('beg2', beg2_f)) end2_f = fv.validate(fields[6], cls.REQUIRED_FIELDS['end2']) efields.append(line.Field('end2', end2_f)) alignment_f = fv.validate(fields[7], cls.REQUIRED_FIELDS['alignment']) efields.append(line.Field('alignment', alignment_f)) for field in fields[8:]: efields.append(line.OptField.from_string(field)) for field in efields: edge.add_field(field) return edge
def from_string(cls, string): """Extract the Gap fields from the string. The string can contains the G character at the begin or can only contains the fields of the Gap directly. """ if len(string.split()) == 0: raise line.InvalidLineError("Cannot parse the empty string.") fields = re.split('\t', string) gfields = [] if fields[0] == 'G': fields = fields[1:] if len(fields) < len(cls.REQUIRED_FIELDS): raise line.InvalidLineError("The minimum number of field for " + "Gap line is not reached.") gap = Gap() gid_f = fv.validate(fields[0], cls.REQUIRED_FIELDS['gid']) gfields.append(line.Field('gid', gid_f)) sid1_f = fv.validate(fields[1], cls.REQUIRED_FIELDS['sid1']) gfields.append(line.Field('sid1', sid1_f)) sid2_f = fv.validate(fields[2], cls.REQUIRED_FIELDS['sid2']) gfields.append(line.Field('sid2', sid2_f)) disp_f = fv.validate(fields[3], cls.REQUIRED_FIELDS['distance']) gfields.append(line.Field('distance', disp_f)) variance_f = fv.validate(fields[4], cls.REQUIRED_FIELDS['variance']) gfields.append(line.Field('variance', variance_f)) for field in fields[5:]: gfields.append(line.OptField.from_string(field)) for field in gfields: gap.add_field(field) return gap
def from_string(cls, string): """Extract the path fields from the string. The string can contains the P character at the begin or can just contains the fields of the path directly. """ if len(string.split()) == 0: raise line.InvalidLineError("Cannot parse the empty string.") fields = re.split('\t', string) pfields = [] if fields[0] == 'P': fields = fields[1:] if len(fields) < len(cls.REQUIRED_FIELDS): raise line.InvalidLineError("The minimum number of field for " + "Path line is not reached.") path = Path() path_name = fv.validate(fields[0], cls.REQUIRED_FIELDS['path_name']) sequences_names = [fv.validate(label, \ cls.REQUIRED_FIELDS['seqs_names']) \ for label in fields[1].split(",") ] overlaps = fv.validate(fields[2], cls.REQUIRED_FIELDS['overlaps']) pfields.append(line.Field('path_name', path_name)) pfields.append(line.Field('seqs_names', sequences_names)) pfields.append(line.Field('overlaps', overlaps)) for field in fields[3:]: pfields.append(line.OptField.from_string(field)) for field in pfields: path.add_field(field) return path
def from_string(cls, string): """Extract the link fields from the string. The string can contains the L character at the begin or can just contains the fields of the link directly. """ if len(string.split()) == 0: raise line.InvalidLineError("Cannot parse the empty string.") fields = re.split('\t', string) lfields = [] if fields[0] == 'L': fields = fields[1:] if len(fields) < len(cls.REQUIRED_FIELDS): raise line.InvalidLineError("The minimum number of field for " + "Link line is not reached.") link = Link() from_name = fv.validate(fields[0], cls.REQUIRED_FIELDS['from']) from_orn = fv.validate(fields[1], cls.REQUIRED_FIELDS['from_orn']) to_name = fv.validate(fields[2], cls.REQUIRED_FIELDS['to']) to_orn = fv.validate(fields[3], cls.REQUIRED_FIELDS['to_orn']) overlap = fv.validate(fields[4], cls.REQUIRED_FIELDS['overlap']) lfields.append(line.Field('from', from_name)) lfields.append(line.Field('from_orn', from_orn)) lfields.append(line.Field('to', to_name)) lfields.append(line.Field('to_orn', to_orn)) lfields.append(line.Field('overlap', overlap)) for field in fields[5:]: lfields.append(line.OptField.from_string(field)) for field in lfields: link.add_field(field) return link
def from_string(cls, string): """Extract the fragment fields from the string. The string can contains the F character at the begin or can only contains the fields of the fragment directly. """ if len(string.split()) == 0: raise line.InvalidLineError("Cannot parse the empty string.") fields = re.split('\t', string) ffields = [] if fields[0] == 'F': fields = fields[1:] if len(fields) < len(cls.REQUIRED_FIELDS): raise line.InvalidLineError("The minimum number of field for " + "Fragment line is not reached.") fragment = Fragment() sid_f = fv.validate(fields[0], cls.REQUIRED_FIELDS['sid']) ffields.append(line.Field('sid', sid_f)) external_f = fv.validate(fields[1], cls.REQUIRED_FIELDS['external']) ffields.append(line.Field('external', external_f)) sbeg_f = fv.validate(fields[2], cls.REQUIRED_FIELDS['sbeg']) ffields.append(line.Field('sbeg', sbeg_f)) send_f = fv.validate(fields[3], cls.REQUIRED_FIELDS['send']) ffields.append(line.Field('send', send_f)) fbeg_f = fv.validate(fields[4], cls.REQUIRED_FIELDS['fbeg']) ffields.append(line.Field('fbeg', fbeg_f)) fend_f = fv.validate(fields[5], cls.REQUIRED_FIELDS['fend']) ffields.append(line.Field('fend', fend_f)) alignment_f = fv.validate(fields[6], cls.REQUIRED_FIELDS['alignment']) ffields.append(line.Field('alignment', alignment_f)) for field in fields[7:]: ffields.append(line.OptField.from_string(field)) for field in ffields: fragment.add_field(field) return fragment
def from_string(cls, string): """Extract the containment fields from the string. The string can contains the C character at the begin or can only contains the fields of the containment directly. """ if len(string.split()) == 0: raise line.InvalidLineError("Cannot parse the empty string.") fields = re.split('\t', string) cfields = [] if fields[0] == 'C': fields = fields[1:] #skip the first field(the C) if len(fields) < len(cls.REQUIRED_FIELDS): raise line.InvalidLineError("The minimum number of field for " + "Containment line is not reached.") containment = Containment() from_name = fv.validate(fields[0], cls.REQUIRED_FIELDS['from']) from_orn = fv.validate(fields[1], cls.REQUIRED_FIELDS['from_orn']) to_name = fv.validate(fields[2], cls.REQUIRED_FIELDS['to']) to_orn = fv.validate(fields[3], cls.REQUIRED_FIELDS['to_orn']) pos = fv.validate(fields[4], cls.REQUIRED_FIELDS['pos']) overlap = fv.validate(fields[5], cls.REQUIRED_FIELDS['overlap']) cfields.append(line.Field('from', from_name)) cfields.append(line.Field('from_orn', from_orn)) cfields.append(line.Field('to', to_name)) cfields.append(line.Field('to_orn', to_orn)) cfields.append(line.Field('pos', pos)) cfields.append(line.Field('overlap', overlap)) for field in fields[6:]: cfields.append(line.OptField.from_string(field)) for field in cfields: containment.add_field(field) return containment
def test_field_type(self): """Use TestField to check how the different field data types are managed. TODO: Check for json parser/verifier existence within python (should exists). """ with self.assertRaises(fv.UnknownDataTypeError): optf = TestField('bb', 'c') # c is an invalid type of field optf = TestField('A', 'A') with self.assertRaises(fv.InvalidFieldError): optf = TestField('aa', 'A') with self.assertRaises(fv.InvalidFieldError): optf = TestField('', 'A') optf = TestField('-42', 'i') self.assertTrue(optf.value == -42) optf = TestField('+42', 'i') optf = TestField('42', 'i') self.assertTrue(optf.value == +42) with self.assertRaises(fv.InvalidFieldError): optf = TestField('aa', 'i') with self.assertRaises(fv.InvalidFieldError): optf = TestField('', 'i') optf = TestField('-1.4241e-11', 'f') optf = TestField('+1.4241E+11', 'f') optf = TestField('42', 'f') with self.assertRaises(fv.InvalidFieldError): optf = TestField('A', 'f') with self.assertRaises(fv.InvalidFieldError): optf = TestField('042e0.5', 'f') with self.assertRaises(fv.InvalidFieldError): optf = TestField('', 'f') optf = TestField('The gray fox jumped from somewhere.', 'Z') with self.assertRaises(fv.InvalidFieldError): optf = TestField('力 - is the force', 'Z') with self.assertRaises(fv.InvalidFieldError): optf = TestField('\n - is the force', 'Z') with self.assertRaises(fv.InvalidFieldError): optf = TestField('', 'Z') # this test should fail optf = TestField('The gray fox jumped from somewhere.', 'J') with self.assertRaises(fv.InvalidFieldError): optf = TestField('力 - is the force', 'J') with self.assertRaises(fv.InvalidFieldError): optf = TestField('\n - is the force', 'J') with self.assertRaises(fv.InvalidFieldError): optf = TestField('', 'J') optf = TestField('A5F', 'H') with self.assertRaises(fv.InvalidFieldError): optf = TestField('a5f', 'H') with self.assertRaises(fv.InvalidFieldError): optf = TestField('g', 'H') with self.assertRaises(fv.InvalidFieldError): optf = TestField('', 'H') optf = TestField('c,15,17,21,-32', 'B') optf = TestField('f,15,.05e4', 'B') with self.assertRaises(fv.InvalidFieldError): optf = TestField('f15,i.05e4', 'B') with self.assertRaises(fv.InvalidFieldError): optf = TestField('', 'B') optf = TestField('(12', fv.GFA1_NAME) with self.assertRaises(fv.InvalidFieldError): optf = TestField('', fv.GFA1_NAME) optf = TestField('+', fv.GFA1_ORIENTATION) optf = TestField('-', fv.GFA1_ORIENTATION) with self.assertRaises(fv.InvalidFieldError): optf = TestField('', fv.GFA1_ORIENTATION) with self.assertRaises(fv.InvalidFieldError): optf = TestField('++', fv.GFA1_ORIENTATION) with self.assertRaises(fv.InvalidFieldError): optf = TestField('a', fv.GFA1_ORIENTATION) optf = TestField('(12-,14+,17-', fv.GFA1_NAMES) # no sign orientation near 14 optf = TestField('(12-,14,17-', fv.GFA1_NAMES) # space is not allowed with self.assertRaises(fv.InvalidFieldError): optf = TestField('(12-, 14+,17-', fv.GFA1_NAMES) # even for separating elements in the array with self.assertRaises(fv.InvalidFieldError): optf = TestField('', fv.GFA1_NAMES) optf = TestField('acgt', fv.GFA1_SEQUENCE) optf = TestField('*', fv.GFA1_SEQUENCE) with self.assertRaises(fv.InvalidFieldError): optf = TestField('*acgt', fv.GFA1_SEQUENCE) with self.assertRaises(fv.InvalidFieldError): optf = TestField('', fv.GFA1_SEQUENCE) optf = TestField('0', fv.GFA1_INT) optf = TestField('100', fv.GFA1_INT) with self.assertRaises(fv.InvalidFieldError): optf = TestField('-1', fv.GFA1_INT) optf = TestField('*', fv.GFA1_CIGAR) optf = TestField('5I2M', fv.GFA1_CIGAR) self.assertTrue(fv.is_gfa1_cigar(optf.value)) with self.assertRaises(fv.InvalidFieldError): optf = TestField('', fv.GFA1_CIGAR) optf = TestField('*,*,*', fv.GFA1_CIGARS) optf = TestField('*', fv.GFA1_CIGARS) optf = TestField('5I2M,*,3X,22M', fv.GFA1_CIGARS) with self.assertRaises(fv.InvalidFieldError): optf = TestField('', fv.GFA1_CIGARS) with self.assertRaises(fv.InvalidFieldError): optf = TestField('5I2M,*,3,22M', fv.GFA1_CIGARS) optf = TestField('5I2M', fv.GFA2_CIGAR) self.assertTrue(fv.is_gfa2_cigar(optf.value)) with self.assertRaises(fv.InvalidFieldError): optf = TestField('', fv.GFA2_CIGAR) with self.assertRaises(fv.InvalidFieldError): optf = TestField('*', fv.GFA2_CIGAR) with self.assertRaises(fv.InvalidFieldError): optf = TestField('5I3X', fv.GFA2_CIGAR) optf = TestField('aa', fv.GFA2_ID) with self.assertRaises(fv.InvalidFieldError): optf = TestField('', fv.GFA2_ID) with self.assertRaises(fv.InvalidFieldError): optf = TestField('a a', fv.GFA2_ID) optf = TestField('aa', fv.GFA2_IDS) self.assertTrue(optf.value == ['aa']) optf = TestField('aa bb cc dd', fv.GFA2_IDS) self.assertTrue(optf.value == ['aa', 'bb', 'cc', 'dd']) with self.assertRaises(fv.InvalidFieldError): optf = TestField('', fv.GFA2_IDS) # there are 2 spaces between the a and the b with self.assertRaises(fv.InvalidFieldError): optf = TestField('a b', fv.GFA2_IDS) optf = TestField('aa+', fv.GFA2_REFERENCE) optf = TestField('aa-', fv.GFA2_REFERENCE) with self.assertRaises(fv.InvalidFieldError): optf = TestField('', fv.GFA2_REFERENCE) with self.assertRaises(fv.InvalidFieldError): optf = TestField('aa', fv.GFA2_REFERENCE) optf = TestField('aa+', fv.GFA2_REFERENCES) self.assertTrue(optf.value == ['aa+']) optf = TestField('aa+ bb- cc+ dd-', fv.GFA2_REFERENCES) self.assertTrue(optf.value == ['aa+', 'bb-', 'cc+', 'dd-']) with self.assertRaises(fv.InvalidFieldError): optf = TestField('', fv.GFA2_REFERENCES) with self.assertRaises(fv.InvalidFieldError): optf = TestField('aa bb+', fv.GFA2_REFERENCES) optf = TestField('42', fv.GFA2_OPTIONAL_INT) self.assertTrue(optf.value == 42) optf = TestField('*', fv.GFA2_OPTIONAL_INT) self.assertTrue(optf.value == "*") optf = TestField('42', fv.GFA2_INT) self.assertTrue(optf.value == 42) with self.assertRaises(fv.InvalidFieldError): optf = TestField('', fv.GFA2_INT) with self.assertRaises(fv.InvalidFieldError): optf = TestField('-42', fv.GFA2_INT) optf = TestField('42', fv.GFA2_TRACE) optf = TestField('42,42', fv.GFA2_TRACE) optf = TestField('42,42,42', fv.GFA2_TRACE) dazz_trace = fv.validate(optf.value, fv.GFA2_TRACE) self.assertTrue(fv.is_dazzler_trace(dazz_trace)) with self.assertRaises(fv.InvalidFieldError): optf = TestField('', fv.GFA2_TRACE) with self.assertRaises(fv.InvalidFieldError): optf = TestField('-42', fv.GFA2_TRACE) optf = TestField('42', fv.GFA2_POSITION) # fv.GFA2_POSITION will be validated and converted # to a string self.assertTrue(optf.value == "42") optf = TestField('42$', fv.GFA2_POSITION) with self.assertRaises(fv.InvalidFieldError): optf = TestField('', fv.GFA2_POSITION) with self.assertRaises(fv.InvalidFieldError): optf = TestField('$', fv.GFA2_POSITION) with self.assertRaises(fv.InvalidFieldError): optf = TestField('1$$', fv.GFA2_POSITION) optf = TestField('*', fv.GFA2_SEQUENCE) optf = TestField('acgtACGTXYZ', fv.GFA2_SEQUENCE) with self.assertRaises(fv.InvalidFieldError): optf = TestField('', fv.GFA2_SEQUENCE) optf = TestField('aa', fv.GFA2_OPTIONAL_ID) optf = TestField('*', fv.GFA2_OPTIONAL_ID) with self.assertRaises(fv.InvalidFieldError): optf = TestField('', fv.GFA2_OPTIONAL_ID) with self.assertRaises(fv.InvalidFieldError): optf = TestField('* ', fv.GFA2_OPTIONAL_ID) optf = TestField('42,42,42', fv.GFA2_ALIGNMENT) optf = TestField('*', fv.GFA2_ALIGNMENT) optf = TestField('2I3M', fv.GFA2_ALIGNMENT) with self.assertRaises(fv.InvalidFieldError): optf = TestField('', fv.GFA2_ALIGNMENT) with self.assertRaises(fv.InvalidFieldError): optf = TestField('42,13M', fv.GFA2_ALIGNMENT)
def __init__(self, value, field_type): self.type = field_type self.value = fv.validate(value, field_type)