Ejemplo n.º 1
0
    def __init__(self,
                 ref_fpath,
                 out_fhand,
                 length=60,
                 vcf_fpath=None,
                 min_length=None):
        ''''It inits.

        The vcf will be used to replace in the reference sequence the SNPs
        around the SNP of interest with IUPAC codes
        '''
        self._sep = u'\t'
        self._len = length
        if min_length is None:
            min_length = length
        if min_length > length:
            msg = 'Minimum length must be smaller than required length'
            raise ValueError(msg)
        self._min_len = min_length

        self._ref_seqs = seq_index(ref_fpath, format='fasta')

        if vcf_fpath:
            self._snvs = Reader(filename=vcf_fpath)
        else:
            self._snvs = None
        self._out_fhand = out_fhand
        out_fhand.write(u'CHROM\tPOS\tID\tseq\n')
        self._prev_chrom = None
Ejemplo n.º 2
0
 def __init__(self, vcf_path, samples=None):
     reader = Reader(filename=vcf_path)
     self.index = {}
     self.samples = samples
     for vcf_record in reader:
         snp_name = create_snp_name(vcf_record)
         self.index[snp_name] = vcf_record
Ejemplo n.º 3
0
    def __init__(self,  ref_fpath, out_fhand, length=60, vcf_fpath=None,
                 min_length=None):
        ''''It inits.

        The vcf will be used to replace in the reference sequence the SNPs
        around the SNP of interest with IUPAC codes
        '''
        self._sep = u'\t'
        self._len = length
        if min_length is None:
            min_length = length
        if min_length > length:
            msg = 'Minimum length must be smaller than required length'
            raise ValueError(msg)
        self._min_len = min_length

        self._ref_seqs = seq_index(ref_fpath, format='fasta')

        if vcf_fpath:
            self._snvs = Reader(filename=vcf_fpath)
        else:
            self._snvs = None
        self._out_fhand = out_fhand
        out_fhand.write(u'CHROM\tPOS\tID\tseq\n')
        self._prev_chrom = None
Ejemplo n.º 4
0
class IlluminaWriter(object):
    '''It writes the SNPs in Illumina format

    ref_fpath should be in fasta format and it has to have a name attribute.
    min_maf controls the SNPs reported in the adjacent segments as IUPAC codes.
    '''

    # TODO add extra error classes
    # TODO include the error classes inside this class to easy access
    class NotEnoughAdjacentSequenceError(Exception):
        pass

    def __init__(self,
                 ref_fpath,
                 out_fhand,
                 length=60,
                 vcf_fpath=None,
                 min_length=None):
        ''''It inits.

        The vcf will be used to replace in the reference sequence the SNPs
        around the SNP of interest with IUPAC codes
        '''
        self._sep = u'\t'
        self._len = length
        if min_length is None:
            min_length = length
        if min_length > length:
            msg = 'Minimum length must be smaller than required length'
            raise ValueError(msg)
        self._min_len = min_length

        self._ref_seqs = seq_index(ref_fpath, format='fasta')

        if vcf_fpath:
            self._snvs = Reader(filename=vcf_fpath)
        else:
            self._snvs = None
        self._out_fhand = out_fhand
        out_fhand.write(u'CHROM\tPOS\tID\tseq\n')
        self._prev_chrom = None

    def write(self, snv):
        chrom_name = snv.CHROM

        prev_chrom = self._prev_chrom
        if prev_chrom is None or prev_chrom.name != chrom_name:
            chrom = self._ref_seqs[chrom_name]
            self._prev_chrom = chrom
        else:
            chrom = prev_chrom

        length = self._len
        min_len = self._min_len

        snv_start = snv.start  # 0 based
        snv_end = snv.end  # 1 based
        desired_start = snv_start - length  # desired segment start
        end = snv_end + length  # desired segment end
        chrom_seq = chrom.seq
        first_segment = unicode(chrom_seq[desired_start:snv_start])

        if len(first_segment) < min_len:
            msg = "Not enough sequence in 3'. ID: %s, POS: %d, CHROM: %s"
            msg %= (snv.ID, snv.POS, snv.CHROM)
            raise self.NotEnoughAdjacentSequenceError(msg)

        if self._snvs:
            real_start = snv_start - len(first_segment)
            close_snvs = self._snvs.fetch(chrom.name,
                                          start=real_start,
                                          end=snv_start)
            first_segment = _replace_snvs_with_iupac(first_segment,
                                                     close_snvs,
                                                     seq_offset=real_start)

        snv_segment = _build_snv_section(snv)
        second_segment = unicode(chrom_seq[snv_end:end])
        if len(second_segment) < min_len:
            msg = "Not enough sequence in 5'. ID: %s, POS: %d, CHROM: %s"
            msg %= (snv.ID, snv.POS, snv.CHROM)
            raise self.NotEnoughAdjacentSequenceError(msg)

        if self._snvs:
            real_end = snv_end + len(second_segment)
            close_snvs = self._snvs.fetch(chrom.name,
                                          start=snv_end,
                                          end=real_end)
            second_segment = _replace_snvs_with_iupac(second_segment,
                                                      close_snvs,
                                                      seq_offset=snv_end)

        out_fhand = self._out_fhand
        sep = self._sep
        out_fhand.write(unicode(snv.CHROM))
        out_fhand.write(sep)
        out_fhand.write(unicode(snv.POS))
        out_fhand.write(sep)
        snp_id = snv.ID
        if snp_id is None:
            snp_id = u'.'
        out_fhand.write(snp_id)
        out_fhand.write(sep)
        out_fhand.write(first_segment)
        out_fhand.write(snv_segment)
        out_fhand.write(second_segment)
        out_fhand.write(u'\n')

    def flush(self):
        self._out_fhand.flush()

    def close(self):
        self._out_fhand.close()
Ejemplo n.º 5
0
class IlluminaWriter(object):
    '''It writes the SNPs in Illumina format

    ref_fpath should be in fasta format and it has to have a name attribute.
    min_maf controls the SNPs reported in the adjacent segments as IUPAC codes.
    '''

    # TODO add extra error classes
    # TODO include the error classes inside this class to easy access
    class NotEnoughAdjacentSequenceError(Exception):
        pass

    def __init__(self,  ref_fpath, out_fhand, length=60, vcf_fpath=None,
                 min_length=None):
        ''''It inits.

        The vcf will be used to replace in the reference sequence the SNPs
        around the SNP of interest with IUPAC codes
        '''
        self._sep = u'\t'
        self._len = length
        if min_length is None:
            min_length = length
        if min_length > length:
            msg = 'Minimum length must be smaller than required length'
            raise ValueError(msg)
        self._min_len = min_length

        self._ref_seqs = seq_index(ref_fpath, format='fasta')

        if vcf_fpath:
            self._snvs = Reader(filename=vcf_fpath)
        else:
            self._snvs = None
        self._out_fhand = out_fhand
        out_fhand.write(u'CHROM\tPOS\tID\tseq\n')
        self._prev_chrom = None

    def write(self, snv):
        chrom_name = snv.CHROM

        prev_chrom = self._prev_chrom
        if prev_chrom is None or prev_chrom.name != chrom_name:
            chrom = self._ref_seqs[chrom_name]
            self._prev_chrom = chrom
        else:
            chrom = prev_chrom

        length = self._len
        min_len = self._min_len

        snv_start = snv.start   # 0 based
        snv_end = snv.end       # 1 based
        desired_start = snv_start - length  # desired segment start
        end = snv_end + length      # desired segment end
        chrom_seq = chrom.seq
        first_segment = unicode(chrom_seq[desired_start:snv_start])

        if len(first_segment) < min_len:
            msg = "Not enough sequence in 3'. ID: %s, POS: %d, CHROM: %s"
            msg %= (snv.ID, snv.POS, snv.CHROM)
            raise self.NotEnoughAdjacentSequenceError(msg)

        if self._snvs:
            real_start = snv_start - len(first_segment)
            close_snvs = self._snvs.fetch(chrom.name, start=real_start,
                                          end=snv_start)
            first_segment = _replace_snvs_with_iupac(first_segment, close_snvs,
                                                     seq_offset=real_start)

        snv_segment = _build_snv_section(snv)
        second_segment = unicode(chrom_seq[snv_end:end])
        if len(second_segment) < min_len:
            msg = "Not enough sequence in 5'. ID: %s, POS: %d, CHROM: %s"
            msg %= (snv.ID, snv.POS, snv.CHROM)
            raise self.NotEnoughAdjacentSequenceError(msg)

        if self._snvs:
            real_end = snv_end + len(second_segment)
            close_snvs = self._snvs.fetch(chrom.name, start=snv_end,
                                          end=real_end)
            second_segment = _replace_snvs_with_iupac(second_segment,
                                                      close_snvs,
                                                      seq_offset=snv_end)

        out_fhand = self._out_fhand
        sep = self._sep
        out_fhand.write(unicode(snv.CHROM))
        out_fhand.write(sep)
        out_fhand.write(unicode(snv.POS))
        out_fhand.write(sep)
        snp_id = snv.ID
        if snp_id is None:
            snp_id = u'.'
        out_fhand.write(snp_id)
        out_fhand.write(sep)
        out_fhand.write(first_segment)
        out_fhand.write(snv_segment)
        out_fhand.write(second_segment)
        out_fhand.write(u'\n')

    def flush(self):
        self._out_fhand.flush()

    def close(self):
        self._out_fhand.close()