Example #1
0
    def from_string(cls, string):
        """Extract the UGroup fields from the string.

        The string can contains the U character at the begin or can
        only contains the fields of the UGroup directly.
        """
        if len(string.split()) == 0:
            raise line.InvalidLineError("Cannot parse the empty string.")
        fields = re.split('\t', string)
        ugfields = []
        if fields[0] == 'U':
            fields = fields[1:]

        if len(fields) < len(cls.REQUIRED_FIELDS):
            raise line.InvalidLineError("The minimum number of field for " +
                                        "UGroup line is not reached.")
        ugroup = UGroup()
        uid_f = fv.validate(fields[0], cls.REQUIRED_FIELDS['uid'])
        ugfields.append(line.Field('uid', uid_f))
        references_f = fv.validate(fields[1], cls.REQUIRED_FIELDS['ids'])
        ugfields.append(line.Field('ids', references_f))

        for field in fields[2:]:
            ugfields.append(line.OptField.from_string(field))

        for field in ugfields:
            ugroup.add_field(field)
        return ugroup
Example #2
0
    def from_string(cls, string):
        """Extract the segment fields from the string.

        The string can contains the S character at the begin
        or can only contains the fields of the segment directly.
        """
        if len(string.split()) == 0:
            raise line.InvalidLineError("Cannot parse the empty string.")
        fields = re.split('\t', string)
        sfields = []
        if fields[0] == 'S':
            fields = fields[1:]

        if len(fields) < len(cls.REQUIRED_FIELDS):
            raise line.InvalidLineError("The minimum number of field for " +
                                        "SegmentV1 line is not reached.")
        segment = SegmentV1()
        name_f = fv.validate(fields[0], cls.REQUIRED_FIELDS['name'])
        sfields.append(line.Field('name', name_f))
        seq_f = fv.validate(fields[1], cls.REQUIRED_FIELDS['sequence'])
        sfields.append(line.Field('sequence', seq_f))

        for field in fields[2:]:
            sfields.append(line.OptField.from_string(field))

        for field in sfields:
            segment.add_field(field)
        return segment
Example #3
0
    def __init__(self, name, value, field_type):
        if not re.fullmatch('[A-Za-z0-9]' * 2, name):
            raise ValueError("Invalid optfield name, given '{0}'".format(name))

        if not re.fullmatch("^[ABHJZif]$", field_type):
            raise ValueError("Invalid type for an optional field.")

        self._name = name
        self._type = field_type
        self._value = fv.validate(value, field_type)
Example #4
0
    def from_string(cls, string):
        """Extract the Edge fields from the string.

        The string can contains the E character at the begin or can
        only contains the fields of the Edge directly.
        """
        if len(string.split()) == 0:
            raise line.InvalidLineError("Cannot parse the empty string.")
        fields = re.split('\t', string)
        efields = []
        if fields[0] == 'E':
            fields = fields[1:]

        if len(fields) < len(cls.REQUIRED_FIELDS):
            raise line.InvalidLineError("The minimum number of field for "
                                        + "Edge line is not reached.")


        edge = Edge()

        eid_f = fv.validate(fields[0], cls.REQUIRED_FIELDS['eid'])
        efields.append(line.Field('eid', eid_f))

        sid1_f = fv.validate(fields[1], cls.REQUIRED_FIELDS['sid1'])
        efields.append(line.Field('sid1', sid1_f))

        sid2_f = fv.validate(fields[2], cls.REQUIRED_FIELDS['sid2'])
        efields.append(line.Field('sid2', sid2_f))

        beg1_f = fv.validate(fields[3], cls.REQUIRED_FIELDS['beg1'])
        efields.append(line.Field('beg1', beg1_f))

        end1_f = fv.validate(fields[4], cls.REQUIRED_FIELDS['end1'])
        efields.append(line.Field('end1', end1_f))

        beg2_f = fv.validate(fields[5], cls.REQUIRED_FIELDS['beg2'])
        efields.append(line.Field('beg2', beg2_f))

        end2_f = fv.validate(fields[6], cls.REQUIRED_FIELDS['end2'])
        efields.append(line.Field('end2', end2_f))

        alignment_f = fv.validate(fields[7], cls.REQUIRED_FIELDS['alignment'])
        efields.append(line.Field('alignment', alignment_f))

        for field in fields[8:]:
            efields.append(line.OptField.from_string(field))

        for field in efields:
            edge.add_field(field)

        return edge
Example #5
0
    def from_string(cls, string):
        """Extract the Gap fields from the string.

        The string can contains the G character at the begin or can
        only contains the fields of the Gap directly.
        """
        if len(string.split()) == 0:
            raise line.InvalidLineError("Cannot parse the empty string.")
        fields = re.split('\t', string)
        gfields = []
        if fields[0] == 'G':
            fields = fields[1:]

        if len(fields) < len(cls.REQUIRED_FIELDS):
            raise line.InvalidLineError("The minimum number of field for " +
                                        "Gap line is not reached.")
        gap = Gap()

        gid_f = fv.validate(fields[0], cls.REQUIRED_FIELDS['gid'])
        gfields.append(line.Field('gid', gid_f))

        sid1_f = fv.validate(fields[1], cls.REQUIRED_FIELDS['sid1'])
        gfields.append(line.Field('sid1', sid1_f))

        sid2_f = fv.validate(fields[2], cls.REQUIRED_FIELDS['sid2'])
        gfields.append(line.Field('sid2', sid2_f))

        disp_f = fv.validate(fields[3], cls.REQUIRED_FIELDS['distance'])
        gfields.append(line.Field('distance', disp_f))

        variance_f = fv.validate(fields[4], cls.REQUIRED_FIELDS['variance'])
        gfields.append(line.Field('variance', variance_f))

        for field in fields[5:]:
            gfields.append(line.OptField.from_string(field))

        for field in gfields:
            gap.add_field(field)

        return gap
Example #6
0
    def from_string(cls, string):
        """Extract the path fields from the string.

        The string can contains the P character at the begin or can
        just contains the fields of the path directly.
        """
        if len(string.split()) == 0:
            raise line.InvalidLineError("Cannot parse the empty string.")
        fields = re.split('\t', string)
        pfields = []
        if fields[0] == 'P':
            fields = fields[1:]

        if len(fields) < len(cls.REQUIRED_FIELDS):
            raise line.InvalidLineError("The minimum number of field for " +
                                        "Path line is not reached.")
        path = Path()
        path_name = fv.validate(fields[0], cls.REQUIRED_FIELDS['path_name'])
        sequences_names = [fv.validate(label, \
                            cls.REQUIRED_FIELDS['seqs_names']) \
                            for label in fields[1].split(",")
                          ]

        overlaps = fv.validate(fields[2], cls.REQUIRED_FIELDS['overlaps'])

        pfields.append(line.Field('path_name', path_name))
        pfields.append(line.Field('seqs_names', sequences_names))
        pfields.append(line.Field('overlaps', overlaps))

        for field in fields[3:]:
            pfields.append(line.OptField.from_string(field))

        for field in pfields:
            path.add_field(field)

        return path
Example #7
0
    def from_string(cls, string):
        """Extract the link fields from the string.

        The string can contains the L character at the begin or can
        just contains the fields of the link directly.
        """
        if len(string.split()) == 0:
            raise line.InvalidLineError("Cannot parse the empty string.")
        fields = re.split('\t', string)
        lfields = []
        if fields[0] == 'L':
            fields = fields[1:]

        if len(fields) < len(cls.REQUIRED_FIELDS):
            raise line.InvalidLineError("The minimum number of field for " +
                                        "Link line is not reached.")
        link = Link()
        from_name = fv.validate(fields[0], cls.REQUIRED_FIELDS['from'])
        from_orn = fv.validate(fields[1], cls.REQUIRED_FIELDS['from_orn'])
        to_name = fv.validate(fields[2], cls.REQUIRED_FIELDS['to'])
        to_orn = fv.validate(fields[3], cls.REQUIRED_FIELDS['to_orn'])
        overlap = fv.validate(fields[4], cls.REQUIRED_FIELDS['overlap'])

        lfields.append(line.Field('from', from_name))
        lfields.append(line.Field('from_orn', from_orn))
        lfields.append(line.Field('to', to_name))
        lfields.append(line.Field('to_orn', to_orn))
        lfields.append(line.Field('overlap', overlap))

        for field in fields[5:]:
            lfields.append(line.OptField.from_string(field))

        for field in lfields:
            link.add_field(field)

        return link
Example #8
0
    def from_string(cls, string):
        """Extract the fragment fields from the string.

        The string can contains the F character at the begin or can
        only contains the fields of the fragment directly.
        """
        if len(string.split()) == 0:
            raise line.InvalidLineError("Cannot parse the empty string.")
        fields = re.split('\t', string)
        ffields = []
        if fields[0] == 'F':
            fields = fields[1:]

        if len(fields) < len(cls.REQUIRED_FIELDS):
            raise line.InvalidLineError("The minimum number of field for " +
                                        "Fragment line is not reached.")

        fragment = Fragment()
        sid_f = fv.validate(fields[0], cls.REQUIRED_FIELDS['sid'])
        ffields.append(line.Field('sid', sid_f))

        external_f = fv.validate(fields[1], cls.REQUIRED_FIELDS['external'])
        ffields.append(line.Field('external', external_f))

        sbeg_f = fv.validate(fields[2], cls.REQUIRED_FIELDS['sbeg'])
        ffields.append(line.Field('sbeg', sbeg_f))

        send_f = fv.validate(fields[3], cls.REQUIRED_FIELDS['send'])
        ffields.append(line.Field('send', send_f))

        fbeg_f = fv.validate(fields[4], cls.REQUIRED_FIELDS['fbeg'])
        ffields.append(line.Field('fbeg', fbeg_f))

        fend_f = fv.validate(fields[5], cls.REQUIRED_FIELDS['fend'])
        ffields.append(line.Field('fend', fend_f))

        alignment_f = fv.validate(fields[6], cls.REQUIRED_FIELDS['alignment'])
        ffields.append(line.Field('alignment', alignment_f))

        for field in fields[7:]:
            ffields.append(line.OptField.from_string(field))

        for field in ffields:
            fragment.add_field(field)

        return fragment
Example #9
0
    def from_string(cls, string):
        """Extract the containment fields from the string.

        The string can contains the C character at the begin or can
        only contains the fields of the containment directly.
        """
        if len(string.split()) == 0:
            raise line.InvalidLineError("Cannot parse the empty string.")
        fields = re.split('\t', string)
        cfields = []
        if fields[0] == 'C':
            fields = fields[1:]  #skip the first field(the C)

        if len(fields) < len(cls.REQUIRED_FIELDS):
            raise line.InvalidLineError("The minimum number of field for " +
                                        "Containment line is not reached.")

        containment = Containment()

        from_name = fv.validate(fields[0], cls.REQUIRED_FIELDS['from'])
        from_orn = fv.validate(fields[1], cls.REQUIRED_FIELDS['from_orn'])
        to_name = fv.validate(fields[2], cls.REQUIRED_FIELDS['to'])
        to_orn = fv.validate(fields[3], cls.REQUIRED_FIELDS['to_orn'])
        pos = fv.validate(fields[4], cls.REQUIRED_FIELDS['pos'])
        overlap = fv.validate(fields[5], cls.REQUIRED_FIELDS['overlap'])

        cfields.append(line.Field('from', from_name))
        cfields.append(line.Field('from_orn', from_orn))
        cfields.append(line.Field('to', to_name))
        cfields.append(line.Field('to_orn', to_orn))
        cfields.append(line.Field('pos', pos))
        cfields.append(line.Field('overlap', overlap))

        for field in fields[6:]:
            cfields.append(line.OptField.from_string(field))

        for field in cfields:
            containment.add_field(field)

        return containment
Example #10
0
    def test_field_type(self):
        """Use TestField to check how the different field data types
        are managed.

        TODO:
            Check for json parser/verifier existence within python
            (should exists).
        """
        with self.assertRaises(fv.UnknownDataTypeError):
            optf = TestField('bb', 'c')  # c is an invalid type of field

        optf = TestField('A', 'A')
        with self.assertRaises(fv.InvalidFieldError):
            optf = TestField('aa', 'A')
        with self.assertRaises(fv.InvalidFieldError):
            optf = TestField('', 'A')

        optf = TestField('-42', 'i')
        self.assertTrue(optf.value == -42)
        optf = TestField('+42', 'i')
        optf = TestField('42', 'i')
        self.assertTrue(optf.value == +42)
        with self.assertRaises(fv.InvalidFieldError):
            optf = TestField('aa', 'i')
        with self.assertRaises(fv.InvalidFieldError):
            optf = TestField('', 'i')

        optf = TestField('-1.4241e-11', 'f')
        optf = TestField('+1.4241E+11', 'f')
        optf = TestField('42', 'f')
        with self.assertRaises(fv.InvalidFieldError):
            optf = TestField('A', 'f')
        with self.assertRaises(fv.InvalidFieldError):
            optf = TestField('042e0.5', 'f')
        with self.assertRaises(fv.InvalidFieldError):
            optf = TestField('', 'f')

        optf = TestField('The gray fox jumped from somewhere.', 'Z')
        with self.assertRaises(fv.InvalidFieldError):
            optf = TestField('力 - is the force', 'Z')
        with self.assertRaises(fv.InvalidFieldError):
            optf = TestField('\n - is the force', 'Z')
        with self.assertRaises(fv.InvalidFieldError):
            optf = TestField('', 'Z')

        # this test should fail
        optf = TestField('The gray fox jumped from somewhere.', 'J')
        with self.assertRaises(fv.InvalidFieldError):
            optf = TestField('力 - is the force', 'J')
        with self.assertRaises(fv.InvalidFieldError):
            optf = TestField('\n - is the force', 'J')
        with self.assertRaises(fv.InvalidFieldError):
            optf = TestField('', 'J')

        optf = TestField('A5F', 'H')
        with self.assertRaises(fv.InvalidFieldError):
            optf = TestField('a5f', 'H')
        with self.assertRaises(fv.InvalidFieldError):
            optf = TestField('g', 'H')
        with self.assertRaises(fv.InvalidFieldError):
            optf = TestField('', 'H')

        optf = TestField('c,15,17,21,-32', 'B')
        optf = TestField('f,15,.05e4', 'B')
        with self.assertRaises(fv.InvalidFieldError):
            optf = TestField('f15,i.05e4', 'B')
        with self.assertRaises(fv.InvalidFieldError):
            optf = TestField('', 'B')

        optf = TestField('(12', fv.GFA1_NAME)
        with self.assertRaises(fv.InvalidFieldError):
            optf = TestField('', fv.GFA1_NAME)

        optf = TestField('+', fv.GFA1_ORIENTATION)
        optf = TestField('-', fv.GFA1_ORIENTATION)
        with self.assertRaises(fv.InvalidFieldError):
            optf = TestField('', fv.GFA1_ORIENTATION)
        with self.assertRaises(fv.InvalidFieldError):
            optf = TestField('++', fv.GFA1_ORIENTATION)
        with self.assertRaises(fv.InvalidFieldError):
            optf = TestField('a', fv.GFA1_ORIENTATION)

        optf = TestField('(12-,14+,17-', fv.GFA1_NAMES)

        # no sign orientation near 14
        optf = TestField('(12-,14,17-', fv.GFA1_NAMES)

        # space is not allowed
        with self.assertRaises(fv.InvalidFieldError):
            optf = TestField('(12-, 14+,17-', fv.GFA1_NAMES)

        # even for separating elements in the array
        with self.assertRaises(fv.InvalidFieldError):
            optf = TestField('', fv.GFA1_NAMES)

        optf = TestField('acgt', fv.GFA1_SEQUENCE)
        optf = TestField('*', fv.GFA1_SEQUENCE)
        with self.assertRaises(fv.InvalidFieldError):
            optf = TestField('*acgt', fv.GFA1_SEQUENCE)
        with self.assertRaises(fv.InvalidFieldError):
            optf = TestField('', fv.GFA1_SEQUENCE)

        optf = TestField('0', fv.GFA1_INT)
        optf = TestField('100', fv.GFA1_INT)
        with self.assertRaises(fv.InvalidFieldError):
            optf = TestField('-1', fv.GFA1_INT)

        optf = TestField('*', fv.GFA1_CIGAR)
        optf = TestField('5I2M', fv.GFA1_CIGAR)
        self.assertTrue(fv.is_gfa1_cigar(optf.value))
        with self.assertRaises(fv.InvalidFieldError):
            optf = TestField('', fv.GFA1_CIGAR)

        optf = TestField('*,*,*', fv.GFA1_CIGARS)
        optf = TestField('*', fv.GFA1_CIGARS)
        optf = TestField('5I2M,*,3X,22M', fv.GFA1_CIGARS)
        with self.assertRaises(fv.InvalidFieldError):
            optf = TestField('', fv.GFA1_CIGARS)
        with self.assertRaises(fv.InvalidFieldError):
            optf = TestField('5I2M,*,3,22M', fv.GFA1_CIGARS)

        optf = TestField('5I2M', fv.GFA2_CIGAR)
        self.assertTrue(fv.is_gfa2_cigar(optf.value))
        with self.assertRaises(fv.InvalidFieldError):
            optf = TestField('', fv.GFA2_CIGAR)
        with self.assertRaises(fv.InvalidFieldError):
            optf = TestField('*', fv.GFA2_CIGAR)
        with self.assertRaises(fv.InvalidFieldError):
            optf = TestField('5I3X', fv.GFA2_CIGAR)

        optf = TestField('aa', fv.GFA2_ID)
        with self.assertRaises(fv.InvalidFieldError):
            optf = TestField('', fv.GFA2_ID)
        with self.assertRaises(fv.InvalidFieldError):
            optf = TestField('a a', fv.GFA2_ID)

        optf = TestField('aa', fv.GFA2_IDS)
        self.assertTrue(optf.value == ['aa'])
        optf = TestField('aa bb cc dd', fv.GFA2_IDS)
        self.assertTrue(optf.value == ['aa', 'bb', 'cc', 'dd'])
        with self.assertRaises(fv.InvalidFieldError):
            optf = TestField('', fv.GFA2_IDS)
        # there are 2 spaces between the a and the b
        with self.assertRaises(fv.InvalidFieldError):
            optf = TestField('a  b', fv.GFA2_IDS)

        optf = TestField('aa+', fv.GFA2_REFERENCE)
        optf = TestField('aa-', fv.GFA2_REFERENCE)
        with self.assertRaises(fv.InvalidFieldError):
            optf = TestField('', fv.GFA2_REFERENCE)
        with self.assertRaises(fv.InvalidFieldError):
            optf = TestField('aa', fv.GFA2_REFERENCE)

        optf = TestField('aa+', fv.GFA2_REFERENCES)
        self.assertTrue(optf.value == ['aa+'])
        optf = TestField('aa+ bb- cc+ dd-', fv.GFA2_REFERENCES)
        self.assertTrue(optf.value == ['aa+', 'bb-', 'cc+', 'dd-'])
        with self.assertRaises(fv.InvalidFieldError):
            optf = TestField('', fv.GFA2_REFERENCES)
        with self.assertRaises(fv.InvalidFieldError):
            optf = TestField('aa bb+', fv.GFA2_REFERENCES)

        optf = TestField('42', fv.GFA2_OPTIONAL_INT)
        self.assertTrue(optf.value == 42)
        optf = TestField('*', fv.GFA2_OPTIONAL_INT)
        self.assertTrue(optf.value == "*")

        optf = TestField('42', fv.GFA2_INT)
        self.assertTrue(optf.value == 42)
        with self.assertRaises(fv.InvalidFieldError):
            optf = TestField('', fv.GFA2_INT)
        with self.assertRaises(fv.InvalidFieldError):
            optf = TestField('-42', fv.GFA2_INT)

        optf = TestField('42', fv.GFA2_TRACE)
        optf = TestField('42,42', fv.GFA2_TRACE)
        optf = TestField('42,42,42', fv.GFA2_TRACE)
        dazz_trace = fv.validate(optf.value, fv.GFA2_TRACE)
        self.assertTrue(fv.is_dazzler_trace(dazz_trace))

        with self.assertRaises(fv.InvalidFieldError):
            optf = TestField('', fv.GFA2_TRACE)
        with self.assertRaises(fv.InvalidFieldError):
            optf = TestField('-42', fv.GFA2_TRACE)

        optf = TestField('42', fv.GFA2_POSITION)
        # fv.GFA2_POSITION will be validated and converted
        # to a string
        self.assertTrue(optf.value == "42")
        optf = TestField('42$', fv.GFA2_POSITION)
        with self.assertRaises(fv.InvalidFieldError):
            optf = TestField('', fv.GFA2_POSITION)
        with self.assertRaises(fv.InvalidFieldError):
            optf = TestField('$', fv.GFA2_POSITION)
        with self.assertRaises(fv.InvalidFieldError):
            optf = TestField('1$$', fv.GFA2_POSITION)

        optf = TestField('*', fv.GFA2_SEQUENCE)
        optf = TestField('acgtACGTXYZ', fv.GFA2_SEQUENCE)
        with self.assertRaises(fv.InvalidFieldError):
            optf = TestField('', fv.GFA2_SEQUENCE)

        optf = TestField('aa', fv.GFA2_OPTIONAL_ID)
        optf = TestField('*', fv.GFA2_OPTIONAL_ID)
        with self.assertRaises(fv.InvalidFieldError):
            optf = TestField('', fv.GFA2_OPTIONAL_ID)
        with self.assertRaises(fv.InvalidFieldError):
            optf = TestField('* ', fv.GFA2_OPTIONAL_ID)

        optf = TestField('42,42,42', fv.GFA2_ALIGNMENT)
        optf = TestField('*', fv.GFA2_ALIGNMENT)
        optf = TestField('2I3M', fv.GFA2_ALIGNMENT)
        with self.assertRaises(fv.InvalidFieldError):
            optf = TestField('', fv.GFA2_ALIGNMENT)
        with self.assertRaises(fv.InvalidFieldError):
            optf = TestField('42,13M', fv.GFA2_ALIGNMENT)
Example #11
0
 def __init__(self, value, field_type):
     self.type = field_type
     self.value = fv.validate(value, field_type)