Beispiel #1
0
def seqio(in_fhands, out_fhands, out_format, copy_if_same_format=True):
    'It converts sequence files between formats'

    in_formats = [guess_format(fhand) for fhand in in_fhands]

    if (len(in_formats) == 1 and in_formats[0] == out_format and
        hasattr(in_fhands[0], 'name')):
        if copy_if_same_format:
            copyfileobj(in_fhands[0], out_fhands[0])
        else:
            rel_symlink(in_fhands[0].name, out_fhands[0].name)

    elif len(in_fhands) == 1 and len(out_fhands) == 1:
        try:
            SeqIO.convert(in_fhands[0], in_formats[0], out_fhands[0],
                          out_format)
        except ValueError as error:
            if error_quality_disagree(error):
                raise MalformedFile(str(error))
            raise
    elif (len(in_fhands) == 1 and len(out_fhands) == 2 and
          out_format == 'fasta'):
        try:
            for seq in read_seqrecords([in_fhands[0]]):
                SeqIO.write([seq], out_fhands[0], out_format)
                SeqIO.write([seq], out_fhands[1], 'qual')
        except ValueError, error:
            if error_quality_disagree(error):
                raise MalformedFile(str(error))
            raise
Beispiel #2
0
def seqio(in_fhands, out_fhand, out_format, copy_if_same_format=True):
    'It converts sequence files between formats'
    if out_format not in get_setting('SUPPORTED_OUTPUT_FORMATS'):
        raise IncompatibleFormatError("This output format is not supported")

    in_formats = [remove_multiline(guess_format(fhand)) for fhand in in_fhands]

    if len(in_fhands) == 1 and in_formats[0] == out_format:
        if copy_if_same_format:
            copyfileobj(in_fhands[0], out_fhand)
        else:
            rel_symlink(in_fhands[0].name, out_fhand.name)
    else:
        seqs = _read_seqrecords(in_fhands)
        try:
            write_seqrecs(seqs, out_fhand, out_format)
        except ValueError, error:
            if error_quality_disagree(error):
                raise MalformedFile(str(error))
            if 'No suitable quality scores' in str(error):
                msg = 'No qualities available to write output file'
                raise IncompatibleFormatError(msg)
            raise
Beispiel #3
0
def seqio(in_fhands, out_fhand, out_format, copy_if_same_format=True):
    'It converts sequence files between formats'
    if out_format not in get_setting('SUPPORTED_OUTPUT_FORMATS'):
        raise IncompatibleFormatError("This output format is not supported")

    in_formats = [get_format(fhand) for fhand in in_fhands]

    if len(in_fhands) == 1 and in_formats[0] == out_format:
        if copy_if_same_format:
            copyfileobj(in_fhands[0], out_fhand)
        else:
            rel_symlink(in_fhands[0].name, out_fhand.name)
    else:
        seqs = _read_seqrecords(in_fhands)
        try:
            write_seqrecs(seqs, out_fhand, out_format)
        except ValueError, error:
            if error_quality_disagree(error):
                raise MalformedFile(str(error))
            if 'No suitable quality scores' in str(error):
                msg = 'No qualities available to write output file'
                raise IncompatibleFormatError(msg)
            raise
Beispiel #4
0
    def test_rel_symlink(self):
        'It tests various cases of rel symlinks'
        tempdir = TemporaryDir()
        try:
            hola = os.path.join(tempdir.name, 'hola')
            os.mkdir(hola)
            caracola = os.path.join(tempdir.name, 'caracola')
            rel_symlink(hola, caracola)
            assert os.path.exists(caracola)

            fname = os.path.join(hola, 'fname')
            open(fname, 'w')
            caracola2 = os.path.join(tempdir.name, 'caracola2')
            rel_symlink(fname, caracola2)
            assert os.path.exists(caracola2)

            path2 = os.path.join(tempdir.name, 'dir1', 'dir2')
            os.makedirs(path2)
            caracola3 = os.path.join(path2, 'caracola3')
            rel_symlink(hola, caracola3)
            assert os.path.exists(caracola3)
        finally:
            tempdir.close()
Beispiel #5
0
    def test_rel_symlink(self):
        'It tests various cases of rel symlinks'
        tempdir = TemporaryDir()
        try:
            hola = os.path.join(tempdir.name, 'hola')
            os.mkdir(hola)
            caracola = os.path.join(tempdir.name, 'caracola')
            rel_symlink(hola, caracola)
            assert os.path.exists(caracola)

            fname = os.path.join(hola, 'fname')
            open(fname, 'w')
            caracola2 = os.path.join(tempdir.name, 'caracola2')
            rel_symlink(fname, caracola2)
            assert os.path.exists(caracola2)

            path2 = os.path.join(tempdir.name, 'dir1', 'dir2')
            os.makedirs(path2)
            caracola3 = os.path.join(path2, 'caracola3')
            rel_symlink(hola, caracola3)
            assert os.path.exists(caracola3)
        finally:
            tempdir.close()
Beispiel #6
0
    def test_rel_symlink(self):
        "It tests various cases of rel symlinks"
        tempdir = TemporaryDir()
        try:
            hola = os.path.join(tempdir.name, "hola")
            os.mkdir(hola)
            caracola = os.path.join(tempdir.name, "caracola")
            rel_symlink(hola, caracola)
            assert os.path.exists(caracola)

            fname = os.path.join(hola, "fname")
            open(fname, "w")
            caracola2 = os.path.join(tempdir.name, "caracola2")
            rel_symlink(fname, caracola2)
            assert os.path.exists(caracola2)

            path2 = os.path.join(tempdir.name, "dir1", "dir2")
            os.makedirs(path2)
            caracola3 = os.path.join(path2, "caracola3")
            rel_symlink(hola, caracola3)
            assert os.path.exists(caracola3)
        finally:
            tempdir.close()
Beispiel #7
0
          in_formats == ['fasta', 'qual']):
        seq_records = SeqIO.QualityIO.PairedFastaQualIterator(in_fhands[0],
                                                              in_fhands[1])
        try:
            SeqIO.write(seq_records, out_fhands[0].name, out_format)
        except ValueError, error:
            if error_quality_disagree(error):
                raise MalformedFile(str(error))
            raise
    elif (len(in_fhands) == 2 and len(out_fhands) == 2 and
          in_formats == ['fasta', 'qual'] and out_format == 'fasta'):
        if copy_if_same_format:
            copyfileobj(in_fhands[0], out_fhands[0])
            copyfileobj(in_fhands[1], out_fhands[1])
        else:
            rel_symlink(in_fhands[0].name, out_fhands[0].name)
            rel_symlink(in_fhands[1].name, out_fhands[1].name)
    else:
        raise RuntimeError('Please fixme, we should not be here')

    for out_fhand in out_fhands:
        out_fhand.flush()


def _count_seqs_in_fasta(fhand):
    'It counts the seqs in a fasta file'
    count = 0
    for line in fhand:
        if line[0] == '>':
            count += 1
    return count