Esempio n. 1
0
 def __init__(self, linkers=None):
     'The initiator'
     if linkers is None:
         linkers = get_setting('LINKERS')
         linkers = [SeqItem(str(i), '>%d\n%s\n' % (i, l)) for i, l in enumerate(linkers)]
         linkers = assing_kind_to_seqs(SEQITEM, linkers, 'fasta')
     self.linkers = list(linkers)
Esempio n. 2
0
 def __init__(self, linkers=None):
     'The initiator'
     if linkers is None:
         linkers = get_setting('LINKERS')
         linkers = [SeqItem(str(i), '>%d\n%s\n' % (i, l)) for i, l in enumerate(linkers)]
         linkers = assing_kind_to_seqs(SEQITEM, linkers, 'fasta')
     self.linkers = list(linkers)
Esempio n. 3
0
def _read_seqitems(fhands):
    'it returns an iterator of seq items (tuples of name and chunk)'
    seq_iters = []
    for fhand in fhands:
        file_format = get_format(fhand)
        seq_iter = _itemize_fastx(fhand)
        seq_iter = assing_kind_to_seqs(SEQITEM, seq_iter, file_format)
        seq_iters.append(seq_iter)
    return chain.from_iterable(seq_iters)
Esempio n. 4
0
def _read_seqitems(fhands):
    'it returns an iterator of seq items (tuples of name and chunk)'
    seq_iters = []
    for fhand in fhands:
        file_format = get_format(fhand)
        seq_iter = _itemize_fastx(fhand)
        seq_iter = assing_kind_to_seqs(SEQITEM, seq_iter, file_format)
        seq_iters.append(seq_iter)
    return chain.from_iterable(seq_iters)
Esempio n. 5
0
    def test_case_change(self):
        'It changes the case of the sequences'
        seqs = [SeqRecord(Seq('aCCg'), letter_annotations={'dummy': 'dddd'})]
        seqs = assing_kind_to_seqs(SEQRECORD, seqs, None)
        change_case = ChangeCase(action=UPPERCASE)
        strs = [get_str_seq(s) for s in change_case(seqs)]
        assert strs == ['ACCG']

        seqs = [SeqRecord(Seq('aCCg'))]
        seqs = assing_kind_to_seqs(SEQRECORD, seqs, None)
        change_case = ChangeCase(action=LOWERCASE)
        strs = [get_str_seq(s) for s in change_case(seqs)]
        assert strs == ['accg']

        seqs = [SeqRecord(Seq('aCCg'))]
        seqs = assing_kind_to_seqs(SEQRECORD, seqs, None)
        change_case = ChangeCase(action=SWAPCASE)
        strs = [get_str_seq(s) for s in change_case(seqs)]
        assert strs == ['AccG']
Esempio n. 6
0
    def test_case_change(self):
        'It changes the case of the sequences'
        seqs = [SeqRecord(Seq('aCCg'), letter_annotations={'dummy': 'dddd'})]
        seqs = assing_kind_to_seqs(SEQRECORD, seqs, None)
        change_case = ChangeCase(action=UPPERCASE)
        strs = [get_str_seq(s) for s in change_case(seqs)]
        assert strs == ['ACCG']

        seqs = [SeqRecord(Seq('aCCg'))]
        seqs = assing_kind_to_seqs(SEQRECORD, seqs, None)
        change_case = ChangeCase(action=LOWERCASE)
        strs = [get_str_seq(s) for s in change_case(seqs)]
        assert strs == ['accg']

        seqs = [SeqRecord(Seq('aCCg'))]
        seqs = assing_kind_to_seqs(SEQRECORD, seqs, None)
        change_case = ChangeCase(action=SWAPCASE)
        strs = [get_str_seq(s) for s in change_case(seqs)]
        assert strs == ['AccG']
Esempio n. 7
0
    def test_matching_segments(self):
        'It tests the detection of oligos in sequence files'
        seq_5 = 'CTAGTCTAGTCGTAGTCATGGCTGTAGTCTAGTCTACGATTCGTATCAGTTGTGTGAC'
        mate_fhand = create_a_matepair_file()

        linkers = assing_kind_to_seqs(SEQRECORD, LINKERS, None)

        expected_region = (len(seq_5), len(seq_5 + TITANIUM_LINKER) - 1)
        matcher = BlasterForFewSubjects(mate_fhand.name, linkers,
                                             program='blastn',
                                             elongate_for_global=True)
        linker_region = matcher.get_matched_segments_for_read('seq1')[0]
        assert [expected_region] == linker_region
Esempio n. 8
0
    def test_matching_segments(self):
        'It tests the detection of oligos in sequence files'
        seq_5 = 'CTAGTCTAGTCGTAGTCATGGCTGTAGTCTAGTCTACGATTCGTATCAGTTGTGTGAC'
        mate_fhand = create_a_matepair_file()

        linkers = [SeqItem('titan', ['>titan\n', TITANIUM_LINKER + '\n']),
                   SeqItem('flx', ['>flx\n', FLX_LINKER + '\n'])]
        linkers = assing_kind_to_seqs(SEQITEM, linkers, 'fasta')

        expected_region = (len(seq_5), len(seq_5 + TITANIUM_LINKER) - 1)
        matcher = BlasterForFewSubjects(mate_fhand.name, linkers,
                                             program='blastn',
                                             elongate_for_global=True)
        linker_region = matcher.get_matched_segments_for_read('seq1')[0]
        assert [expected_region] == linker_region
Esempio n. 9
0
    def test_matching_segments(self):
        'It tests the detection of oligos in sequence files'
        seq_5 = 'CTAGTCTAGTCGTAGTCATGGCTGTAGTCTAGTCTACGATTCGTATCAGTTGTGTGAC'
        mate_fhand = create_a_matepair_file()

        linkers = [
            SeqItem('titan', ['>titan\n', TITANIUM_LINKER + '\n']),
            SeqItem('flx', ['>flx\n', FLX_LINKER + '\n'])
        ]
        linkers = assing_kind_to_seqs(SEQITEM, linkers, 'fasta')

        expected_region = (len(seq_5), len(seq_5 + TITANIUM_LINKER) - 1)
        matcher = BlasterForFewSubjects(mate_fhand.name,
                                        linkers,
                                        program='blastn',
                                        elongate_for_global=True)
        linker_region = matcher.get_matched_segments_for_read('seq1')[0]
        assert [expected_region] == linker_region
Esempio n. 10
0
def _read_seqitems(fhands, file_format):
    'it returns an iterator of seq items (tuples of name and chunk)'
    seq_iters = []
    for fhand in fhands:
        if file_format == GUESS_FORMAT or file_format is None:
            file_format = guess_format(fhand)
        else:
            file_format = file_format

        if file_format == 'fasta':
            seq_iter = _itemize_fasta(fhand)
        elif 'multiline' not in file_format and 'fastq' in file_format:
            seq_iter = _itemize_fastq(fhand)
        else:
            msg = 'Format not supported by the itemizers: ' + file_format
            raise NotImplementedError(msg)
        seq_iter = assing_kind_to_seqs(SEQITEM, seq_iter, file_format)
        seq_iters.append(seq_iter)
    return chain.from_iterable(seq_iters)
Esempio n. 11
0
def read_seqs(fhands,
              file_format=GUESS_FORMAT,
              out_format=None,
              prefered_seq_classes=None):
    'It returns a stream of seqs in different codings: seqrecords, seqitems...'

    if not prefered_seq_classes:
        prefered_seq_classes = [SEQITEM, SEQRECORD]

    if file_format == GUESS_FORMAT:
        in_format = guess_format(fhands[0])
    else:
        in_format = file_format

    if out_format not in (None, GUESS_FORMAT):

        if in_format != out_format:
            if SEQITEM in prefered_seq_classes:
                # seqitems is incompatible with different input and output
                # formats
                prefered_seq_classes.pop(prefered_seq_classes.index(SEQITEM))

    if not prefered_seq_classes:
        msg = 'No valid seq class left or prefered'
        raise ValueError(msg)

    for seq_class in prefered_seq_classes:
        if seq_class == SEQITEM:
            try:
                return _read_seqitems(fhands, in_format)
            except NotImplementedError:
                continue
        elif seq_class == SEQRECORD:
            try:
                seqs = _read_seqrecords(fhands, in_format)
                return assing_kind_to_seqs(SEQRECORD, seqs, None)
            except NotImplementedError:
                continue
        else:
            raise ValueError('Unknown class for seq: ' + seq_class)
    raise RuntimeError('We should not be here, fixme')
Esempio n. 12
0
def _read_seqitems(fhands):
    'it returns an iterator of seq items (tuples of name and chunk)'
    seq_iters = []
    for fhand in fhands:
        file_format = get_format(fhand)

        if file_format == 'fasta':
            seq_iter = _itemize_fasta(fhand)
        elif 'multiline' not in file_format and 'fastq' in file_format:
            try:
                seq_iter = _itemize_fastq(fhand)
            except ValueError as error:
                if error_quality_disagree(error):
                    raise MalformedFile(str(error))
                raise
        else:
            msg = 'Format not supported by the itemizers: ' + file_format
            raise NotImplementedError(msg)
        seq_iter = assing_kind_to_seqs(SEQITEM, seq_iter, file_format)
        seq_iters.append(seq_iter)
    return chain.from_iterable(seq_iters)
Esempio n. 13
0
def read_seqs(fhands, file_format=GUESS_FORMAT, out_format=None,
              prefered_seq_classes=None):
    'It returns a stream of seqs in different codings: seqrecords, seqitems...'

    if not prefered_seq_classes:
        prefered_seq_classes = [SEQITEM, SEQRECORD]

    if file_format == GUESS_FORMAT:
        in_format = guess_format(fhands[0])
    else:
        in_format = file_format

    if out_format not in (None, GUESS_FORMAT):

        if in_format != out_format:
            if SEQITEM in prefered_seq_classes:
                # seqitems is incompatible with different input and output
                # formats
                prefered_seq_classes.pop(prefered_seq_classes.index(SEQITEM))

    if not prefered_seq_classes:
        msg = 'No valid seq class left or prefered'
        raise ValueError(msg)

    for seq_class in prefered_seq_classes:
        if seq_class == SEQITEM:
            try:
                return _read_seqitems(fhands, in_format)
            except NotImplementedError:
                continue
        elif seq_class == SEQRECORD:
            try:
                seqs = _read_seqrecords(fhands, in_format)
                return assing_kind_to_seqs(SEQRECORD, seqs, None)
            except NotImplementedError:
                continue
        else:
            raise ValueError('Unknown class for seq: ' + seq_class)
    raise RuntimeError('We should not be here, fixme')
Esempio n. 14
0
def read_seqs(fhands, out_format=None, prefered_seq_classes=None):
    'It returns a stream of seqs in different codings: seqrecords, seqitems...'

    if not prefered_seq_classes:
        prefered_seq_classes = [SEQITEM, SEQRECORD]
    try:
        in_format = get_format(fhands[0])
    except FileIsEmptyError:
        return []
    # seqitems is incompatible with different input and output formats
    # or when in_format != a fasta or fastq
    if ((out_format not in (None, GUESS_FORMAT) and in_format != out_format
         and SEQITEM in prefered_seq_classes) or
        (in_format not in ('fasta',) + SANGER_FASTQ_FORMATS +
         ILLUMINA_FASTQ_FORMATS)):
        prefered_seq_classes.pop(prefered_seq_classes.index(SEQITEM))

    if not prefered_seq_classes:
        msg = 'No valid seq class left or prefered'
        raise ValueError(msg)

    for seq_class in prefered_seq_classes:
        if seq_class == SEQITEM:
            try:
                return _read_seqitems(fhands)
            except NotImplementedError:
                continue
        elif seq_class == SEQRECORD:
            try:
                seqs = _read_seqrecords(fhands)
                return assing_kind_to_seqs(SEQRECORD, seqs, None)
            except NotImplementedError:
                continue
        else:
            raise ValueError('Unknown class for seq: ' + seq_class)
    raise RuntimeError('We should not be here, fixme')
Esempio n. 15
0
def read_seqs(fhands, out_format=None, prefered_seq_classes=None):
    'It returns a stream of seqs in different codings: seqrecords, seqitems...'

    if not prefered_seq_classes:
        prefered_seq_classes = [SEQITEM, SEQRECORD]
    try:
        in_format = get_format(fhands[0])
    except FileIsEmptyError:
        return []
    # seqitems is incompatible with different input and output formats
    # or when in_format != a fasta or fastq
    if ((out_format not in (None, GUESS_FORMAT) and in_format != out_format
         and SEQITEM in prefered_seq_classes) or
        (in_format not in ('fasta',) + SANGER_FASTQ_FORMATS +
         ILLUMINA_FASTQ_FORMATS)):
        prefered_seq_classes.pop(prefered_seq_classes.index(SEQITEM))

    if not prefered_seq_classes:
        msg = 'No valid seq class left or prefered'
        raise ValueError(msg)

    for seq_class in prefered_seq_classes:
        if seq_class == SEQITEM:
            try:
                return _read_seqitems(fhands)
            except NotImplementedError:
                continue
        elif seq_class == SEQRECORD:
            try:
                seqs = _read_seqrecords(fhands)
                return assing_kind_to_seqs(SEQRECORD, seqs, None)
            except NotImplementedError:
                continue
        else:
            raise ValueError('Unknown class for seq: ' + seq_class)
    raise RuntimeError('We should not be here, fixme')
Esempio n. 16
0
def _read_seqitems(fhands, file_format):
    'it returns an iterator of seq items (tuples of name and chunk)'
    seq_iters = []
    for fhand in fhands:
        if file_format == GUESS_FORMAT or file_format is None:
            file_format = guess_format(fhand)
        else:
            file_format = file_format

        if file_format == 'fasta':
            seq_iter = _itemize_fasta(fhand)
        elif 'multiline' not in file_format and 'fastq' in file_format:
            try:
                seq_iter = _itemize_fastq(fhand)
            except ValueError as error:
                if error_quality_disagree(error):
                    raise MalformedFile(str(error))
                raise
        else:
            msg = 'Format not supported by the itemizers: ' + file_format
            raise NotImplementedError(msg)
        seq_iter = assing_kind_to_seqs(SEQITEM, seq_iter, file_format)
        seq_iters.append(seq_iter)
    return chain.from_iterable(seq_iters)