コード例 #1
0
    def __init__(self, output, header_str):
        self.output = output
        self.header_str = header_str

        # create a cyvcf2 file for formatting, not for writing the file
        tmp = tempfile.NamedTemporaryFile(mode="w", suffix=".vcf")
        self.vcf = Writer.from_string(tmp, self.header_str)

        # print the header
        print(self.header_str, end="", file=self.output)
コード例 #2
0
    def to_vcf(self, path):
        from cyvcf2 import Writer
        header = '''##fileformat=VCFv4.2
#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO
'''
        writer = Writer.from_string(path, header)

        for v in self:
            variant = writer.variant_from_string('\t'.join(
                [v.chrom,
                 str(v.pos), '.', v.ref, v.alt, '.', '.', '.']))
            writer.write_record(variant)
コード例 #3
0
def test_writer_from_string():

    header = """##fileformat=VCFv4.1
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##contig=<ID=chr2,length=249250621,assembly=hg19>
#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	samplea
"""

    w = Writer.from_string("out.vcf", header)
    w.write_header()
    v = w.variant_from_string("chr1\t234\t.\tA\tC\t40\tPASS\t.\tGT\t0/0")
    w.write_record(v)
    w.close()
コード例 #4
0
ファイル: vcf_query.py プロジェクト: Hoeze/kipoiseq
    def to_vcf(self, path, remove_samples=False, clean_info=False):
        """
        Parse query result as vcf file.

        Args:
          path: path of the file.
          remove_samples: remove sample columns from vcf file
          clean_info: clean info fields from vcf file
        """
        from cyvcf2 import Writer

        header = self.vcf.raw_header

        # remove sample columns  from header
        # 2th last column in the header is columns
        # all the columns after 8th is for samples
        if remove_samples:
            header = header.split('\n')
            columns = header[-2].strip().split('\t')
            header[-2] = '\t'.join(columns[:8])
            header = '\n'.join(header)

        writer = Writer.from_string(path, header)

        for v in self:
            variant = v.source

            if remove_samples or clean_info:
                variant = str(variant).strip().split()

                if remove_samples:
                    # all the columns after 8th is for samples so remove them
                    variant = variant[:8]

                if clean_info:
                    # 7th column contains info fields
                    # replace it with N/A
                    variant[7] = '.'

                variant = writer.variant_from_string('\t'.join(variant))

            writer.write_record(variant)
コード例 #5
0
def main():

    parser = argparse.ArgumentParser(description="""
    Utility to convert bed-like file with alleles to a simple VCF
    """)

    parser.add_argument('-b',
                        '--bed',
                        help="""
                      bed file to convert to vcf
                      must have chrom, start, end, ref, and alt columns
                      """,
                        required=True)
    parser.add_argument('-f',
                        '--fai',
                        help="""
                      fasta index file generated by samtools faidx
                      """,
                        required=True)
    parser.add_argument('-o',
                        '--output',
                        help=""",
                      output file name, 
                      prints to standard out if not supplied
                      """)

    args = parser.parse_args()

    # - is accepted as standard out for cyvcf2
    if args.output is None:
        output = "-"
    else:
        output = args.output

    hdr = build_header(args.fai)

    w = Writer.from_string(output, hdr)

    bed_to_vcf(args.bed, w)
    w.close()
コード例 #6
0
ファイル: haplotypeVCF.py プロジェクト: sialsaffar/RiceThesis
# Getting the vcf header
rawheader = vcf.raw_header
newline = "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t%s"

headerlist = []
for line in rawheader.split('\n'):
    if line.startswith('##'):
        headerlist.append(line)
    else:
        pass

headerlist.append(newline)
header = '\n'.join(map(str, headerlist))

w_a = Writer.from_string(''.join([sample, '_a.vcf.gz']), header % sample_a)
w_a.write_header()

w_b = Writer.from_string(''.join([sample, '_b.vcf.gz']), header % sample_b)
w_b.write_header()

for v in vcf:
    # class to get genotype in 0/0, 0/1, ... etc. format.
    gts = v.genotypes

    class Genotype(object):
        __slots__ = ('alleles', 'phased')

        def __init__(self, li):
            self.alleles = li[:-1]
            self.phased = li[-1]