예제 #1
0
    def from_string(cls, string):
        """Extract the UGroup fields from the string.

        The string can contains the U character at the begin or can
        only contains the fields of the UGroup directly.
        """
        if len(string.split()) == 0:
            raise line.InvalidLineError("Cannot parse the empty string.")
        fields = re.split('\t', string)
        ugfields = []
        if fields[0] == 'U':
            fields = fields[1:]

        if len(fields) < len(cls.REQUIRED_FIELDS):
            raise line.InvalidLineError("The minimum number of field for " +
                                        "UGroup line is not reached.")
        ugroup = UGroup()
        uid_f = fv.validate(fields[0], cls.REQUIRED_FIELDS['uid'])
        ugfields.append(line.Field('uid', uid_f))
        references_f = fv.validate(fields[1], cls.REQUIRED_FIELDS['ids'])
        ugfields.append(line.Field('ids', references_f))

        for field in fields[2:]:
            ugfields.append(line.OptField.from_string(field))

        for field in ugfields:
            ugroup.add_field(field)
        return ugroup
예제 #2
0
    def from_string(cls, string):
        """Extract the segment fields from the string.

        The string can contains the S character at the begin
        or can only contains the fields of the segment directly.
        """
        if len(string.split()) == 0:
            raise line.InvalidLineError("Cannot parse the empty string.")
        fields = re.split('\t', string)
        sfields = []
        if fields[0] == 'S':
            fields = fields[1:]

        if len(fields) < len(cls.REQUIRED_FIELDS):
            raise line.InvalidLineError("The minimum number of field for " +
                                        "SegmentV1 line is not reached.")
        segment = SegmentV1()
        name_f = fv.validate(fields[0], cls.REQUIRED_FIELDS['name'])
        sfields.append(line.Field('name', name_f))
        seq_f = fv.validate(fields[1], cls.REQUIRED_FIELDS['sequence'])
        sfields.append(line.Field('sequence', seq_f))

        for field in fields[2:]:
            sfields.append(line.OptField.from_string(field))

        for field in sfields:
            segment.add_field(field)
        return segment
예제 #3
0
    def test_Field(self):
        field = line.Field('name', '25')
        bf = BadField('name', '25')
        self.assertTrue(field == bf)

        del (bf.name)
        self.assertFalse(field == bf)
예제 #4
0
 def test_invalid_line(self):
     """
     Create a GFA1 Segment line, add to it a name and
     an optional field.
     Since it misses a required field(sequence) it shouldn't
     be valid. 
     """
     seg = segment.SegmentV1()
     seg.add_field(line.Field('name', '3'))
     seg.add_field(line.OptField('AC', '3', 'i'))
     self.assertFalse(segment.SegmentV1.is_valid(seg))
예제 #5
0
파일: edge.py 프로젝트: Francesco2304/pygfa
    def from_string(cls, string):
        """Extract the Edge fields from the string.

        The string can contains the E character at the begin or can
        only contains the fields of the Edge directly.
        """
        if len(string.split()) == 0:
            raise line.InvalidLineError("Cannot parse the empty string.")
        fields = re.split('\t', string)
        efields = []
        if fields[0] == 'E':
            fields = fields[1:]

        if len(fields) < len(cls.REQUIRED_FIELDS):
            raise line.InvalidLineError("The minimum number of field for "
                                        + "Edge line is not reached.")


        edge = Edge()

        eid_f = fv.validate(fields[0], cls.REQUIRED_FIELDS['eid'])
        efields.append(line.Field('eid', eid_f))

        sid1_f = fv.validate(fields[1], cls.REQUIRED_FIELDS['sid1'])
        efields.append(line.Field('sid1', sid1_f))

        sid2_f = fv.validate(fields[2], cls.REQUIRED_FIELDS['sid2'])
        efields.append(line.Field('sid2', sid2_f))

        beg1_f = fv.validate(fields[3], cls.REQUIRED_FIELDS['beg1'])
        efields.append(line.Field('beg1', beg1_f))

        end1_f = fv.validate(fields[4], cls.REQUIRED_FIELDS['end1'])
        efields.append(line.Field('end1', end1_f))

        beg2_f = fv.validate(fields[5], cls.REQUIRED_FIELDS['beg2'])
        efields.append(line.Field('beg2', beg2_f))

        end2_f = fv.validate(fields[6], cls.REQUIRED_FIELDS['end2'])
        efields.append(line.Field('end2', end2_f))

        alignment_f = fv.validate(fields[7], cls.REQUIRED_FIELDS['alignment'])
        efields.append(line.Field('alignment', alignment_f))

        for field in fields[8:]:
            efields.append(line.OptField.from_string(field))

        for field in efields:
            edge.add_field(field)

        return edge
예제 #6
0
    def from_string(cls, string):
        """Extract the Gap fields from the string.

        The string can contains the G character at the begin or can
        only contains the fields of the Gap directly.
        """
        if len(string.split()) == 0:
            raise line.InvalidLineError("Cannot parse the empty string.")
        fields = re.split('\t', string)
        gfields = []
        if fields[0] == 'G':
            fields = fields[1:]

        if len(fields) < len(cls.REQUIRED_FIELDS):
            raise line.InvalidLineError("The minimum number of field for " +
                                        "Gap line is not reached.")
        gap = Gap()

        gid_f = fv.validate(fields[0], cls.REQUIRED_FIELDS['gid'])
        gfields.append(line.Field('gid', gid_f))

        sid1_f = fv.validate(fields[1], cls.REQUIRED_FIELDS['sid1'])
        gfields.append(line.Field('sid1', sid1_f))

        sid2_f = fv.validate(fields[2], cls.REQUIRED_FIELDS['sid2'])
        gfields.append(line.Field('sid2', sid2_f))

        disp_f = fv.validate(fields[3], cls.REQUIRED_FIELDS['distance'])
        gfields.append(line.Field('distance', disp_f))

        variance_f = fv.validate(fields[4], cls.REQUIRED_FIELDS['variance'])
        gfields.append(line.Field('variance', variance_f))

        for field in fields[5:]:
            gfields.append(line.OptField.from_string(field))

        for field in gfields:
            gap.add_field(field)

        return gap
예제 #7
0
    def from_string(cls, string):
        """Extract the path fields from the string.

        The string can contains the P character at the begin or can
        just contains the fields of the path directly.
        """
        if len(string.split()) == 0:
            raise line.InvalidLineError("Cannot parse the empty string.")
        fields = re.split('\t', string)
        pfields = []
        if fields[0] == 'P':
            fields = fields[1:]

        if len(fields) < len(cls.REQUIRED_FIELDS):
            raise line.InvalidLineError("The minimum number of field for " +
                                        "Path line is not reached.")
        path = Path()
        path_name = fv.validate(fields[0], cls.REQUIRED_FIELDS['path_name'])
        sequences_names = [fv.validate(label, \
                            cls.REQUIRED_FIELDS['seqs_names']) \
                            for label in fields[1].split(",")
                          ]

        overlaps = fv.validate(fields[2], cls.REQUIRED_FIELDS['overlaps'])

        pfields.append(line.Field('path_name', path_name))
        pfields.append(line.Field('seqs_names', sequences_names))
        pfields.append(line.Field('overlaps', overlaps))

        for field in fields[3:]:
            pfields.append(line.OptField.from_string(field))

        for field in pfields:
            path.add_field(field)

        return path
예제 #8
0
    def from_string(cls, string):
        """Extract the link fields from the string.

        The string can contains the L character at the begin or can
        just contains the fields of the link directly.
        """
        if len(string.split()) == 0:
            raise line.InvalidLineError("Cannot parse the empty string.")
        fields = re.split('\t', string)
        lfields = []
        if fields[0] == 'L':
            fields = fields[1:]

        if len(fields) < len(cls.REQUIRED_FIELDS):
            raise line.InvalidLineError("The minimum number of field for " +
                                        "Link line is not reached.")
        link = Link()
        from_name = fv.validate(fields[0], cls.REQUIRED_FIELDS['from'])
        from_orn = fv.validate(fields[1], cls.REQUIRED_FIELDS['from_orn'])
        to_name = fv.validate(fields[2], cls.REQUIRED_FIELDS['to'])
        to_orn = fv.validate(fields[3], cls.REQUIRED_FIELDS['to_orn'])
        overlap = fv.validate(fields[4], cls.REQUIRED_FIELDS['overlap'])

        lfields.append(line.Field('from', from_name))
        lfields.append(line.Field('from_orn', from_orn))
        lfields.append(line.Field('to', to_name))
        lfields.append(line.Field('to_orn', to_orn))
        lfields.append(line.Field('overlap', overlap))

        for field in fields[5:]:
            lfields.append(line.OptField.from_string(field))

        for field in lfields:
            link.add_field(field)

        return link
예제 #9
0
    def from_string(cls, string):
        """Extract the fragment fields from the string.

        The string can contains the F character at the begin or can
        only contains the fields of the fragment directly.
        """
        if len(string.split()) == 0:
            raise line.InvalidLineError("Cannot parse the empty string.")
        fields = re.split('\t', string)
        ffields = []
        if fields[0] == 'F':
            fields = fields[1:]

        if len(fields) < len(cls.REQUIRED_FIELDS):
            raise line.InvalidLineError("The minimum number of field for " +
                                        "Fragment line is not reached.")

        fragment = Fragment()
        sid_f = fv.validate(fields[0], cls.REQUIRED_FIELDS['sid'])
        ffields.append(line.Field('sid', sid_f))

        external_f = fv.validate(fields[1], cls.REQUIRED_FIELDS['external'])
        ffields.append(line.Field('external', external_f))

        sbeg_f = fv.validate(fields[2], cls.REQUIRED_FIELDS['sbeg'])
        ffields.append(line.Field('sbeg', sbeg_f))

        send_f = fv.validate(fields[3], cls.REQUIRED_FIELDS['send'])
        ffields.append(line.Field('send', send_f))

        fbeg_f = fv.validate(fields[4], cls.REQUIRED_FIELDS['fbeg'])
        ffields.append(line.Field('fbeg', fbeg_f))

        fend_f = fv.validate(fields[5], cls.REQUIRED_FIELDS['fend'])
        ffields.append(line.Field('fend', fend_f))

        alignment_f = fv.validate(fields[6], cls.REQUIRED_FIELDS['alignment'])
        ffields.append(line.Field('alignment', alignment_f))

        for field in fields[7:]:
            ffields.append(line.OptField.from_string(field))

        for field in ffields:
            fragment.add_field(field)

        return fragment
예제 #10
0
    def from_string(cls, string):
        """Extract the containment fields from the string.

        The string can contains the C character at the begin or can
        only contains the fields of the containment directly.
        """
        if len(string.split()) == 0:
            raise line.InvalidLineError("Cannot parse the empty string.")
        fields = re.split('\t', string)
        cfields = []
        if fields[0] == 'C':
            fields = fields[1:]  #skip the first field(the C)

        if len(fields) < len(cls.REQUIRED_FIELDS):
            raise line.InvalidLineError("The minimum number of field for " +
                                        "Containment line is not reached.")

        containment = Containment()

        from_name = fv.validate(fields[0], cls.REQUIRED_FIELDS['from'])
        from_orn = fv.validate(fields[1], cls.REQUIRED_FIELDS['from_orn'])
        to_name = fv.validate(fields[2], cls.REQUIRED_FIELDS['to'])
        to_orn = fv.validate(fields[3], cls.REQUIRED_FIELDS['to_orn'])
        pos = fv.validate(fields[4], cls.REQUIRED_FIELDS['pos'])
        overlap = fv.validate(fields[5], cls.REQUIRED_FIELDS['overlap'])

        cfields.append(line.Field('from', from_name))
        cfields.append(line.Field('from_orn', from_orn))
        cfields.append(line.Field('to', to_name))
        cfields.append(line.Field('to_orn', to_orn))
        cfields.append(line.Field('pos', pos))
        cfields.append(line.Field('overlap', overlap))

        for field in fields[6:]:
            cfields.append(line.OptField.from_string(field))

        for field in cfields:
            containment.add_field(field)

        return containment
예제 #11
0
    def test_line(self):
        """Test the different behaviour of line
        objects.
        Compare the behaviors of add_field. 
        """
        seg = segment.SegmentV1()
        # add a required field
        seg.add_field(line.Field('name', '3'))
        self.assertTrue(seg.fields['name'].value == '3')
        # add an optional field
        seg.add_field(line.OptField('AA', '3', 'i'))
        self.assertTrue(seg.fields['AA'].value == 3)

        # add any object
        with self.assertRaises(fv.InvalidFieldError):
            seg.add_field(3)

        # add a field previously added
        with self.assertRaises(ValueError):
            seg.add_field(line.OptField('AA', '3', 'i'))

        # add an invalid optfield
        with self.assertRaises(fv.InvalidFieldError):
            seg.add_field(line.OptField('AA', 'a', 'i'))

        # if a Field is passed, the method just remove
        # the value associated with the Field name
        seg_copy = copy.deepcopy(seg)
        seg.remove_field(line.Field('name', '4'))
        self.assertTrue('name' not in seg.fields)
        self.assertTrue(seg != seg_copy)

        seg.add_field(line.Field('name', '3'))
        # if a string is passed, remove the value associated
        # with name of the key given
        seg.remove_field('name')
        self.assertTrue('name' not in seg.fields)

        seg_copy = copy.deepcopy(seg)
        seg.remove_field('non_existent_field')
        self.assertTrue(seg_copy == seg)

        fields_copy = copy.deepcopy(seg.fields)
        self.assertTrue(seg != fields_copy)

        segment_fields = segment.SegmentV1.get_static_fields()
        for field in ('name', 'sequence', 'LN', \
                      'RC', 'FC', 'KC', 'SH', 'UR'):
            self.assertTrue(field in segment_fields)

        invalid_segment = segment.SegmentV1()
        invalid_segment.add_field(line.Field("name", "3"))
        self.assertFalse(segment.SegmentV1.is_valid(invalid_segment))

        # test against duck typing.
        # So in the case the user is trying to replicate the line class
        # (maybe to extend it)
        invalid_segment = line.Line()
        del (invalid_segment._type)
        self.assertFalse(segment.SegmentV1.is_valid(invalid_segment))

        invalid_segment = segment.SegmentV1()
        invalid_segment._type = None
        invalid_segment.add_field(line.Field("name", "3"))
        invalid_segment.add_field(line.Field("sequence", "acgt"))
        self.assertFalse(segment.SegmentV1.is_valid(invalid_segment))

        with self.assertRaises(fv.InvalidFieldError):
            invalid_segment.add_field(line.Field("AA", "3"))
예제 #12
0
    def test_node(self):
        nod = node.Node("15", "acgt", 4)
        with self.assertRaises(node.InvalidNodeError):
            nod = node.Node("*", "3", "aCGT")

        correct_segment = segment.SegmentV1()
        correct_segment.add_field(line.Field('name', "3"))
        correct_segment.add_field(
            line.Field('sequence', "TGCAACGTATAGACTTGTCAC"))
        correct_segment.add_field(line.OptField('AA', "test", "Z"))
        correct_segment.add_field(line.OptField('AB', "test2", "Z"))
        self.assertTrue(segment.SegmentV1.is_valid(correct_segment))

        node_ = node.Node.from_line(correct_segment)

        self.assertTrue(node.is_node(node_))

        bad_node = BadNode(node_.nid, node_.sequence, node_.slen,
                           node_.opt_fields)
        self.assertTrue(node.is_node(bad_node))
        self.assertTrue(bad_node == node_)

        # if all the opt_fields of the left operand are not
        # in the opt_fields of the right operand the
        # nodes are not equal
        del (bad_node.opt_fields["AA"])
        self.assertFalse(bad_node == node_)

        bad_node.nid = "5"
        self.assertFalse(bad_node == node_)

        del (bad_node.nid)
        self.assertFalse(node.is_node(bad_node))
        self.assertTrue(bad_node != node_)
        # self.assertTrue(str(node_) == "nid : 3,\t" \
        #                                       + "sequence : TGCAACGTATAGACTTGTCAC,\t" \
        #                                       + "slen : None,\t" \
        #                                       + "opt_fields : []")

        fault_segment = segment.SegmentV1()
        fault_segment.add_field(line.Field("name", "3"))
        # By not adding this, the segment hasn't got all the field required
        self.assertFalse(segment.SegmentV1.is_valid(fault_segment))

        fault_line = segment.SegmentV1()
        fault_line._fields["name_"] = line.Field("name", "3")
        fault_line._fields["sequence"] = line.Field("sequence",
                                                    "TGCAACGTATAGACTTGTCAC")
        with self.assertRaises(line.InvalidLineError):
            node.Node.from_line(fault_line)

        # inserting a wrong field to opt_fields
        seg = segment.SegmentV2.from_string(
            "S\t3\t21\tTGCAACGTATAGACTTGTCAC\tRC:i:4")
        seg.fields['wrong_field'] = 42
        self.assertTrue('wrong_field' in seg.fields)
        node_ = node.Node.from_line(seg)
        self.assertFalse('wrong_field' in node_.opt_fields)

        with self.assertRaises(node.InvalidNodeError):
            node.Node("3", 3, "acgt")

        with self.assertRaises(node.InvalidNodeError):
            node.Node("3", "3", "acgt acgt")