Example #1
0
    def test_fastq(self):
        'It guesses the format for the solexa and illumina fastq'

        txt = '@HWI-EAS209_0006_FC706VJ:5:58:5894:21141#ATCACG/1\n'
        txt += 'TTAATTGGTAAATAAATCTCCTAATAGCTTAGATNTTACCTTNNNNNNNNNNTAGTTTCT\n'
        txt += '+HWI-EAS209_0006_FC706VJ:5:58:5894:21141#ATCACG/1\n'
        txt += 'efcfffffcfeefffcffffffddf`feed]`]_Ba_^__[YBBBBBBBBBBRTT\]][]\n'
        fhand = StringIO(txt)
        assert get_format(fhand) == 'fastq-illumina'

        txt = '@HWI-EAS209_0006_FC706VJ:5:58:5894:21141#ATCACG/1\n'
        txt += 'TTAATTGGTAAATAAATCTCCTAATAGCTTAGATNTTACCTTNNNNNNNNNNTAGTTTCT\n'
        txt += 'TTAATTGGTAAATAAATCTCCTAATAGCTTAGATNTTACCTTNNNNNNNNNNTAGTTTCT\n'
        txt += '+HWI-EAS209_0006_FC706VJ:5:58:5894:21141#ATCACG/1\n'
        txt += 'efcfffffcfeefffcffffffddf`feed]`]_Ba_^__[YBBBBBBBBBBRTT\]][]\n'
        txt += 'efcfffffcfeefffcffffffddf`feed]`]_Ba_^__[YBBBBBBBBBBRTT\]][]\n'

        fhand = StringIO(txt + txt)
        assert get_format(fhand) == 'fastq-illumina'

        fhand = StringIO('@HWI-EAS209\n@')
        try:
            assert get_format(fhand) == 'fasta'
            self.fail('UndecidedFastqVersionError expected')
        except UndecidedFastqVersionError:
            pass

        # sanger
        txt = '@HWI-EAS209_0006_FC706VJ:5:58:5894:21141#ATCACG/1\n'
        txt += 'TTAATTGGTAAATAAATCTCCTAATAGCTTAGATNTTACCTTNNNNNNNNNNTAGTTTCT\n'
        txt += '+HWI-EAS209_0006_FC706VJ:5:58:5894:21141#ATCACG/1\n'
        txt += '000000000000000000000000000000000000000000000000000000000000\n'
        fhand = StringIO(txt)
        assert get_format(fhand) == 'fastq'
    def test_fastq(self):
        'It guesses the format for the solexa and illumina fastq'

        txt = '@HWI-EAS209_0006_FC706VJ:5:58:5894:21141#ATCACG/1\n'
        txt += 'TTAATTGGTAAATAAATCTCCTAATAGCTTAGATNTTACCTTNNNNNNNNNNTAGTTTCT\n'
        txt += '+HWI-EAS209_0006_FC706VJ:5:58:5894:21141#ATCACG/1\n'
        txt += 'efcfffffcfeefffcffffffddf`feed]`]_Ba_^__[YBBBBBBBBBBRTT\]][]\n'
        fhand = StringIO(txt)
        assert get_format(fhand) == 'fastq-illumina'

        txt = '@HWI-EAS209_0006_FC706VJ:5:58:5894:21141#ATCACG/1\n'
        txt += 'TTAATTGGTAAATAAATCTCCTAATAGCTTAGATNTTACCTTNNNNNNNNNNTAGTTTCT\n'
        txt += 'TTAATTGGTAAATAAATCTCCTAATAGCTTAGATNTTACCTTNNNNNNNNNNTAGTTTCT\n'
        txt += '+HWI-EAS209_0006_FC706VJ:5:58:5894:21141#ATCACG/1\n'
        txt += 'efcfffffcfeefffcffffffddf`feed]`]_Ba_^__[YBBBBBBBBBBRTT\]][]\n'
        txt += 'efcfffffcfeefffcffffffddf`feed]`]_Ba_^__[YBBBBBBBBBBRTT\]][]\n'

        fhand = StringIO(txt + txt)
        assert get_format(fhand) == 'fastq-illumina'

        fhand = StringIO('@HWI-EAS209\n@')
        try:
            assert get_format(fhand) == 'fasta'
            self.fail('UndecidedFastqVersionError expected')
        except UndecidedFastqVersionError:
            pass

        # sanger
        txt = '@HWI-EAS209_0006_FC706VJ:5:58:5894:21141#ATCACG/1\n'
        txt += 'TTAATTGGTAAATAAATCTCCTAATAGCTTAGATNTTACCTTNNNNNNNNNNTAGTTTCT\n'
        txt += '+HWI-EAS209_0006_FC706VJ:5:58:5894:21141#ATCACG/1\n'
        txt += '000000000000000000000000000000000000000000000000000000000000\n'
        fhand = StringIO(txt)
        assert get_format(fhand) == 'fastq'
Example #3
0
 def test_unkown(self):
     'It tests unkown formats'
     fhand = StringIO('xseq\nACTC\n')
     try:
         get_format(fhand)
         self.fail('UnknownFormatError expected')
     except UnknownFormatError:
         pass
 def test_empty_file(self):
     'It guesses the format of an empty file'
     fhand = StringIO()
     try:
         get_format(fhand)
         self.fail('FileIsEmptyError expected')
     except FileIsEmptyError:
         pass
Example #5
0
 def test_empty_file(self):
     'It guesses the format of an empty file'
     fhand = StringIO()
     try:
         get_format(fhand)
         self.fail('FileIsEmptyError expected')
     except FileIsEmptyError:
         pass
 def test_unkown(self):
     'It tests unkown formats'
     fhand = StringIO('xseq\nACTC\n')
     try:
         get_format(fhand)
         self.fail('UnknownFormatError expected')
     except UnknownFormatError:
         pass
 def test_long_illumina(self):
     'The qualities seem illumina, but the reads are too lengthly'
     txt = '@read\n'
     txt += 'T' * 400 + '\n'
     txt += '+\n'
     txt += '@' * 400 + '\n'
     fhand = StringIO(txt)
     try:
         get_format(fhand)
         self.fail('UndecidedFastqVersionError expected')
     except UndecidedFastqVersionError:
         pass
Example #8
0
 def test_long_illumina(self):
     'The qualities seem illumina, but the reads are too lengthly'
     txt = '@read\n'
     txt += 'T' * 400 + '\n'
     txt += '+\n'
     txt += '@' * 400 + '\n'
     fhand = StringIO(txt)
     try:
         get_format(fhand)
         self.fail('UndecidedFastqVersionError expected')
     except UndecidedFastqVersionError:
         pass
Example #9
0
def _read_seqitems(fhands):
    'it returns an iterator of seq items (tuples of name and chunk)'
    seq_iters = []
    for fhand in fhands:
        file_format = get_format(fhand)
        seq_iter = _itemize_fastx(fhand)
        seq_iter = assing_kind_to_seqs(SEQITEM, seq_iter, file_format)
        seq_iters.append(seq_iter)
    return chain.from_iterable(seq_iters)
Example #10
0
def _read_seqitems(fhands):
    'it returns an iterator of seq items (tuples of name and chunk)'
    seq_iters = []
    for fhand in fhands:
        file_format = get_format(fhand)
        seq_iter = _itemize_fastx(fhand)
        seq_iter = assing_kind_to_seqs(SEQITEM, seq_iter, file_format)
        seq_iters.append(seq_iter)
    return chain.from_iterable(seq_iters)
    def test_fasta(self):
        'It guess fasta formats'
        fhand = StringIO('>seq\nACTC\n')
        assert get_format(fhand) == 'fasta'

        # multiline fasta
        fhand = StringIO('>seq\nACTC\nACTG\n>seq2\nACTG\n')
        assert get_format(fhand) == 'fasta'

        # qual
        fhand = StringIO('>seq\n10 20\n')
        assert get_format(fhand) == 'qual'

        # qual
        qual = ">seq1\n30 30 30 30 30 30 30 30\n>seq2\n30 30 30 30 30 30 30"
        qual += " 30\n>seq3\n30 30 30 30 30 30 30 30\n"

        fhand = StringIO(qual)
        assert get_format(fhand) == 'qual'
Example #12
0
    def test_get_format_stringio(self):
        "It checks the get/set format functions"
        #stiongIO
        stringIO_fhand = StringIO('>seq\natgctacgacta\n')

        striongIOhash = hashlib.sha224(stringIO_fhand.getvalue()[:100]).hexdigest()
        id_ = id(stringIO_fhand)

        file_format = get_format(stringIO_fhand)
        assert FILEFORMAT_INVENTORY[(id_, striongIOhash)] == file_format
Example #13
0
    def test_fasta(self):
        'It guess fasta formats'
        fhand = StringIO('>seq\nACTC\n')
        assert get_format(fhand) == 'fasta'

        # multiline fasta
        fhand = StringIO('>seq\nACTC\nACTG\n>seq2\nACTG\n')
        assert get_format(fhand) == 'fasta'

        # qual
        fhand = StringIO('>seq\n10 20\n')
        assert get_format(fhand) == 'qual'

        # qual
        qual = ">seq1\n30 30 30 30 30 30 30 30\n>seq2\n30 30 30 30 30 30 30"
        qual += " 30\n>seq3\n30 30 30 30 30 30 30 30\n"

        fhand = StringIO(qual)
        assert get_format(fhand) == 'qual'
Example #14
0
    def test_get_format_stringio(self):
        "It checks the get/set format functions"
        #stiongIO
        stringIO_fhand = StringIO('>seq\natgctacgacta\n')

        striongIOhash = hashlib.sha224(
            stringIO_fhand.getvalue()[:100]).hexdigest()
        id_ = id(stringIO_fhand)

        file_format = get_format(stringIO_fhand)
        assert FILEFORMAT_INVENTORY[(id_, striongIOhash)] == file_format
Example #15
0
    def test_get_format_fhand(self):
        "It checks the get/set format functions"
        #file fhand
        fhand = NamedTemporaryFile()
        fhand.write('>seq\natgctacgacta\n')
        fhand.flush()
        name = fhand.name
        id_ = id(fhand)

        file_format = get_format(fhand)
        assert FILEFORMAT_INVENTORY[(id_, name)] == file_format
        num_keys = len(FILEFORMAT_INVENTORY)

        file_format = get_format(fhand)
        assert FILEFORMAT_INVENTORY[(id_, name)] == file_format
        assert len(FILEFORMAT_INVENTORY) == num_keys

        fhand = NamedTemporaryFile()
        set_format(fhand, 'fasta')

        assert 'fasta' == get_format(fhand)
Example #16
0
    def test_get_format_fhand(self):
        "It checks the get/set format functions"
        #file fhand
        fhand = NamedTemporaryFile()
        fhand.write('>seq\natgctacgacta\n')
        fhand.flush()
        name = fhand.name
        id_ = id(fhand)

        file_format = get_format(fhand)
        assert FILEFORMAT_INVENTORY[(id_, name)] == file_format
        num_keys = len(FILEFORMAT_INVENTORY)

        file_format = get_format(fhand)
        assert FILEFORMAT_INVENTORY[(id_, name)] == file_format
        assert len(FILEFORMAT_INVENTORY) == num_keys

        fhand = NamedTemporaryFile()
        set_format(fhand, 'fasta')

        assert 'fasta' == get_format(fhand)
Example #17
0
def parse_basic_args(parser):
    'It parses the command line and it returns a dict with the arguments.'
    parsed_args = parser.parse_args()
    # we have to wrap the file in a BufferedReader to allow peeking into stdin
    wrapped_fhands = []
    # if input is stdin it will be a fhand not a list of fhands.
    # we have to convert to a list
    in_fhands = parsed_args.input
    if not isinstance(in_fhands, list):
        in_fhands = [in_fhands]
    for fhand in in_fhands:
        fhand = wrap_in_buffered_reader(fhand)
        fhand = uncompress_if_required(fhand)
        wrapped_fhands.append(fhand)

    # We have to add the one_line to the fastq files in order to get the
    # speed improvements of the seqitems
    in_format = parsed_args.in_format
    if in_format == GUESS_FORMAT:
        for wrapped_fhand in wrapped_fhands:
            get_format(wrapped_fhand)
    else:
        for wrapped_fhand in wrapped_fhands:
            set_format(wrapped_fhand, in_format)

    out_fhand = getattr(parsed_args, OUTFILE)

    comp_kind = get_requested_compression(parsed_args)
    if isinstance(out_fhand, list):
        new_out_fhands = []
        for out_f in out_fhand:
            try:
                out_f = compress_fhand(out_f, compression_kind=comp_kind)
            except RuntimeError, error:
                parser.error(error)

            new_out_fhands.append(out_f)
        out_fhand = new_out_fhands
Example #18
0
def parse_basic_args(parser):
    'It parses the command line and it returns a dict with the arguments.'
    parsed_args = parser.parse_args()
    # we have to wrap the file in a BufferedReader to allow peeking into stdin
    wrapped_fhands = []
    # if input is stdin it will be a fhand not a list of fhands.
    # we have to convert to a list
    in_fhands = parsed_args.input
    if not isinstance(in_fhands, list):
        in_fhands = [in_fhands]
    for fhand in in_fhands:
        fhand = wrap_in_buffered_reader(fhand)
        fhand = uncompress_if_required(fhand)
        wrapped_fhands.append(fhand)

    # We have to add the one_line to the fastq files in order to get the
    # speed improvements of the seqitems
    in_format = parsed_args.in_format
    if in_format == GUESS_FORMAT:
        for wrapped_fhand in wrapped_fhands:
            get_format(wrapped_fhand)
    else:
        for wrapped_fhand in wrapped_fhands:
            set_format(wrapped_fhand, in_format)

    out_fhand = getattr(parsed_args, OUTFILE)

    comp_kind = get_requested_compression(parsed_args)
    if isinstance(out_fhand, list):
        new_out_fhands = []
        for out_f in out_fhand:
            try:
                out_f = compress_fhand(out_f, compression_kind=comp_kind)
            except RuntimeError, error:
                parser.error(error)

            new_out_fhands.append(out_f)
        out_fhand = new_out_fhands
Example #19
0
def sort_by_position_in_ref(in_fhand, index_fpath, directory=None,
                            tempdir=None):
    # changed to bwa mem from bowtie, test doesn't work well, check it out
    in_fpath = in_fhand.name
    file_format = get_format(open(in_fpath))
    extra_params = ['--very-fast']
    if 'fasta' in file_format:
        extra_params.append('-f')
    bowtie2_process = map_with_bowtie2(index_fpath, paired_fpaths=None,
                                       unpaired_fpath=in_fpath,
                                       extra_params=extra_params)
    out_fhand = NamedTemporaryFile()
    map_process_to_sortedbam(bowtie2_process, out_fhand.name, tempdir=tempdir)
    samfile = AlignmentFile(out_fhand.name)
    for aligned_read in samfile:
        yield alignedread_to_seqitem(aligned_read)
Example #20
0
def sort_by_position_in_ref(in_fhand, index_fpath, directory=None,
                            tempdir=None):
    #changed to bwa mem from bowtie, test doesn't work well, check it out
    in_fpath = in_fhand.name
    file_format = get_format(open(in_fpath))
    extra_params = ['--very-fast']
    if 'fasta' in file_format:
        extra_params.append('-f')
    bowtie2_process = map_with_bowtie2(index_fpath, paired_fpaths=None,
                                       unpaired_fpath=in_fpath,
                                       extra_params=extra_params)
    out_fhand = NamedTemporaryFile()
    map_process_to_sortedbam(bowtie2_process, out_fhand.name, tempdir=tempdir)
    samfile = pysam.Samfile(out_fhand.name)
    for aligned_read in samfile:
        yield alignedread_to_seqitem(aligned_read)
Example #21
0
def _read_seqrecords(fhands):
    'It returns an iterator of seqrecords'
    seq_iters = []
    for fhand in fhands:
        fmt = get_format(fhand)

        if fmt in ('fasta', 'qual') or 'fastq' in fmt:
            title = title2ids
        if fmt == 'fasta':
            seq_iter = FastaIterator(fhand, title2ids=title)
        elif fmt == 'qual':
            seq_iter = QualPhredIterator(fhand, title2ids=title)
        elif fmt == 'fastq' or fmt == 'fastq-sanger':
            seq_iter = FastqPhredIterator(fhand, title2ids=title)
        elif fmt == 'fastq-solexa':
            seq_iter = FastqSolexaIterator(fhand, title2ids=title)
        elif fmt == 'fastq-illumina':
            seq_iter = FastqIlluminaIterator(fhand, title2ids=title)
        else:
            seq_iter = parse_into_seqrecs(fhand, fmt)
        seq_iters.append(seq_iter)
    return chain.from_iterable(seq_iters)
Example #22
0
def _read_seqrecords(fhands):
    'It returns an iterator of seqrecords'
    seq_iters = []
    for fhand in fhands:
        fmt = get_format(fhand)

        if fmt in ('fasta', 'qual') or 'fastq' in fmt:
            title = title2ids
        if fmt == 'fasta':
            seq_iter = FastaIterator(fhand, title2ids=title)
        elif fmt == 'qual':
            seq_iter = QualPhredIterator(fhand, title2ids=title)
        elif fmt == 'fastq' or fmt == 'fastq-sanger':
            seq_iter = FastqPhredIterator(fhand, title2ids=title)
        elif fmt == 'fastq-solexa':
            seq_iter = FastqSolexaIterator(fhand, title2ids=title)
        elif fmt == 'fastq-illumina':
            seq_iter = FastqIlluminaIterator(fhand, title2ids=title)
        else:
            seq_iter = parse_into_seqrecs(fhand, fmt)
        seq_iters.append(seq_iter)
    return chain.from_iterable(seq_iters)
Example #23
0
def seqio(in_fhands, out_fhand, out_format, copy_if_same_format=True):
    'It converts sequence files between formats'
    if out_format not in get_setting('SUPPORTED_OUTPUT_FORMATS'):
        raise IncompatibleFormatError("This output format is not supported")

    in_formats = [get_format(fhand) for fhand in in_fhands]

    if len(in_fhands) == 1 and in_formats[0] == out_format:
        if copy_if_same_format:
            copyfileobj(in_fhands[0], out_fhand)
        else:
            rel_symlink(in_fhands[0].name, out_fhand.name)
    else:
        seqs = _read_seqrecords(in_fhands)
        try:
            write_seqrecs(seqs, out_fhand, out_format)
        except ValueError, error:
            if error_quality_disagree(error):
                raise MalformedFile(str(error))
            if 'No suitable quality scores' in str(error):
                msg = 'No qualities available to write output file'
                raise IncompatibleFormatError(msg)
            raise
Example #24
0
def seqio(in_fhands, out_fhand, out_format, copy_if_same_format=True):
    'It converts sequence files between formats'
    if out_format not in get_setting('SUPPORTED_OUTPUT_FORMATS'):
        raise IncompatibleFormatError("This output format is not supported")

    in_formats = [get_format(fhand) for fhand in in_fhands]

    if len(in_fhands) == 1 and in_formats[0] == out_format:
        if copy_if_same_format:
            copyfileobj(in_fhands[0], out_fhand)
        else:
            rel_symlink(in_fhands[0].name, out_fhand.name)
    else:
        seqs = _read_seqrecords(in_fhands)
        try:
            write_seqrecs(seqs, out_fhand, out_format)
        except ValueError, error:
            if error_quality_disagree(error):
                raise MalformedFile(str(error))
            if 'No suitable quality scores' in str(error):
                msg = 'No qualities available to write output file'
                raise IncompatibleFormatError(msg)
            raise
Example #25
0
def read_seqs(fhands, out_format=None, prefered_seq_classes=None):
    'It returns a stream of seqs in different codings: seqrecords, seqitems...'

    if not prefered_seq_classes:
        prefered_seq_classes = [SEQITEM, SEQRECORD]
    try:
        in_format = get_format(fhands[0])
    except FileIsEmptyError:
        return []
    # seqitems is incompatible with different input and output formats
    # or when in_format != a fasta or fastq
    if ((out_format not in (None, GUESS_FORMAT) and in_format != out_format
         and SEQITEM in prefered_seq_classes) or
        (in_format not in ('fasta',) + SANGER_FASTQ_FORMATS +
         ILLUMINA_FASTQ_FORMATS)):
        prefered_seq_classes.pop(prefered_seq_classes.index(SEQITEM))

    if not prefered_seq_classes:
        msg = 'No valid seq class left or prefered'
        raise ValueError(msg)

    for seq_class in prefered_seq_classes:
        if seq_class == SEQITEM:
            try:
                return _read_seqitems(fhands)
            except NotImplementedError:
                continue
        elif seq_class == SEQRECORD:
            try:
                seqs = _read_seqrecords(fhands)
                return assing_kind_to_seqs(SEQRECORD, seqs, None)
            except NotImplementedError:
                continue
        else:
            raise ValueError('Unknown class for seq: ' + seq_class)
    raise RuntimeError('We should not be here, fixme')
Example #26
0
def read_seqs(fhands, out_format=None, prefered_seq_classes=None):
    'It returns a stream of seqs in different codings: seqrecords, seqitems...'

    if not prefered_seq_classes:
        prefered_seq_classes = [SEQITEM, SEQRECORD]
    try:
        in_format = get_format(fhands[0])
    except FileIsEmptyError:
        return []
    # seqitems is incompatible with different input and output formats
    # or when in_format != a fasta or fastq
    if ((out_format not in (None, GUESS_FORMAT) and in_format != out_format
         and SEQITEM in prefered_seq_classes) or
        (in_format not in ('fasta',) + SANGER_FASTQ_FORMATS +
         ILLUMINA_FASTQ_FORMATS)):
        prefered_seq_classes.pop(prefered_seq_classes.index(SEQITEM))

    if not prefered_seq_classes:
        msg = 'No valid seq class left or prefered'
        raise ValueError(msg)

    for seq_class in prefered_seq_classes:
        if seq_class == SEQITEM:
            try:
                return _read_seqitems(fhands)
            except NotImplementedError:
                continue
        elif seq_class == SEQRECORD:
            try:
                seqs = _read_seqrecords(fhands)
                return assing_kind_to_seqs(SEQRECORD, seqs, None)
            except NotImplementedError:
                continue
        else:
            raise ValueError('Unknown class for seq: ' + seq_class)
    raise RuntimeError('We should not be here, fixme')
Example #27
0
    def test_with_long_desc(self):
        fhand = StringIO('''>comp27222_c1_seq1 len=4926 path=[89166356:0-46 89167522:47-85 89315292:86-121 89170132:122-176 89377211:177-217 89377235:218-244 89172846:245-247 89172856:248-251 89173028:252-276 89174386:277-292 89174684:293-506 89377352:507-582 89183669:583-587 89183821:588-613 89184868:614-644 89185624:645-719 89187914:720-723 89187935:724-870 89191280:871-887 89377494:888-907 89191517:908-927 89193046:928-1071 89198507:1072-1109 89199632:1110-1170 89201544:1171-1194 89202607:1195-1247 89377606:1248-1252 89377611:1253-1591 89215759:1592-1606 89215815:1607-1636 89216359:1637-1664 89377693:1665-1678 88727916:1679-2152 88743802:2153-2171 88744738:2172-2623 88759485:2624-2648 88759762:2649-2953 88769199:2954-2971 88769596:2972-3657 88791809:3658-3665 88792014:3666-3723 88793720:3724-3731 88794381:3732-3812 88799277:3813-3813 88799328:3814-3996 88807093:3997-3999 88807177:4000-4215 88813164:4216-4246 88814188:4247-4287 88815355:4288-4308 88816198:4309-4352 88817845:4353-4369 88818294:4370-4403 88818879:4404-4465 88821150:4466-4469 88821188:4470-4925]
GAAGGATCGATCGGCCTCGGCGGTGTTCCCAAAAATCTAAGAGCGTTTACTCCAAGCTTC''')
        get_format(fhand)
Example #28
0
            try:
                out_f = compress_fhand(out_f, compression_kind=comp_kind)
            except RuntimeError, error:
                parser.error(error)

            new_out_fhands.append(out_f)
        out_fhand = new_out_fhands
    else:
        try:
            out_fhand = compress_fhand(out_fhand, compression_kind=comp_kind)
        except RuntimeError, error:
            parser.error(error)

    # The default output format is the same as the first file
    if 'fastq' in in_format or in_format == GUESS_FORMAT:
        out_format = get_format(wrapped_fhands[0])
    else:
        out_format = in_format

    # The original fhands should be stored, because otherwise they would be
    # closed
    args = {
        'out_fhand': out_fhand,
        'in_fhands': wrapped_fhands,
        'out_format': out_format,
        'original_in_fhands': in_fhands
    }
    return args, parsed_args


def parse_basic_parallel_args(parser):
    def test_with_long_desc(self):
        fhand = StringIO('''>comp27222_c1_seq1 len=4926 path=[89166356:0-46 89167522:47-85 89315292:86-121 89170132:122-176 89377211:177-217 89377235:218-244 89172846:245-247 89172856:248-251 89173028:252-276 89174386:277-292 89174684:293-506 89377352:507-582 89183669:583-587 89183821:588-613 89184868:614-644 89185624:645-719 89187914:720-723 89187935:724-870 89191280:871-887 89377494:888-907 89191517:908-927 89193046:928-1071 89198507:1072-1109 89199632:1110-1170 89201544:1171-1194 89202607:1195-1247 89377606:1248-1252 89377611:1253-1591 89215759:1592-1606 89215815:1607-1636 89216359:1637-1664 89377693:1665-1678 88727916:1679-2152 88743802:2153-2171 88744738:2172-2623 88759485:2624-2648 88759762:2649-2953 88769199:2954-2971 88769596:2972-3657 88791809:3658-3665 88792014:3666-3723 88793720:3724-3731 88794381:3732-3812 88799277:3813-3813 88799328:3814-3996 88807093:3997-3999 88807177:4000-4215 88813164:4216-4246 88814188:4247-4287 88815355:4288-4308 88816198:4309-4352 88817845:4353-4369 88818294:4370-4403 88818879:4404-4465 88821150:4466-4469 88821188:4470-4925]
GAAGGATCGATCGGCCTCGGCGGTGTTCCCAAAAATCTAAGAGCGTTTACTCCAAGCTTC''')
        get_format(fhand)
Example #30
0
            try:
                out_f = compress_fhand(out_f, compression_kind=comp_kind)
            except RuntimeError, error:
                parser.error(error)

            new_out_fhands.append(out_f)
        out_fhand = new_out_fhands
    else:
        try:
            out_fhand = compress_fhand(out_fhand, compression_kind=comp_kind)
        except RuntimeError, error:
            parser.error(error)

    # The default output format is the same as the first file
    if 'fastq' in in_format or in_format == GUESS_FORMAT:
        out_format = get_format(wrapped_fhands[0])
    else:
        out_format = in_format

    # The original fhands should be stored, because otherwise they would be
    # closed
    args = {'out_fhand': out_fhand, 'in_fhands': wrapped_fhands,
            'out_format': out_format, 'original_in_fhands': in_fhands}
    return args, parsed_args


def parse_basic_parallel_args(parser):
    'It parses the command line and it returns a dict with the arguments.'
    args, parsed_args = parse_basic_args(parser)
    args['processes'] = parsed_args.processes
    return args, parsed_args