Exemple #1
0
    def execute(self):

        counter = 0
        for vcf_file_name in self.vcf_file_names:
            # TODO This is very similar to what we do in vcfpaste
            # Should abstract out in both cases so there's less repeated code
            input_stream = None
            if vcf_file_name.endswith('.gz'):
                input_stream = gzip.open(vcf_file_name, 'rb')
            else:
                input_stream = open(vcf_file_name, 'r')

            samples = l_bp.parse_vcf(input_stream,
                                     self.vcf_lines,
                                     self.vcf_headers,
                                     include_ref=self.include_ref)
            for sample in samples:
                self.vcf_headers.append("##SAMPLE=<ID=" + sample + ">\n")
                self.has_genotypes = True
            counter += 1
            if counter > self.batchsize:
                self.vcf_lines.sort(key=l_bp.vcf_line_key)
                self.write_temp_file()
                counter = 0
        # no need to write the final batch to file
        # FIXME Replace this with a new VCF class with the headers all added
        self.write_header()

        self.vcf_lines.sort(key=l_bp.vcf_line_key)
        iterables = self.temp_files + [self.vcf_lines]
        self.output_handle.writelines(merge(*iterables))
        self.close_tempfiles()
Exemple #2
0
    def execute(self):
        counter = 0
        samples_name_list = []  # Avoid same sample lines -D
        for vcf_file_name in self.vcf_file_names:
            input_stream = InputStream(vcf_file_name, self.tempdir)
            samples = l_bp.parse_vcf(input_stream,
                                     self.vcf_lines,
                                     self.vcf_headers,
                                     include_ref=self.include_ref)
            for sample in samples:
                if sample not in samples_name_list and sample != 'VARIOUS':
                    self.vcf_headers.append("##SAMPLE=<ID=" + sample + ">\n")
                    samples_name_list.append(sample)
                    self.has_genotypes = True
                else:
                    self.has_genotypes = True
            counter += 1
            if counter > self.batchsize:
                self.vcf_lines.sort(key=l_bp.vcf_line_key)
                self.write_temp_file()
                counter = 0
        # no need to write the final batch to file
        # FIXME Replace this with a new VCF class with the headers all added
        self.write_header()

        self.vcf_lines.sort(key=l_bp.vcf_line_key)
        iterables = self.temp_files + [self.vcf_lines]
        self.output_handle.writelines(merge(*iterables))
        self.close_tempfiles()
Exemple #3
0
    def execute(self):

        counter = 0
        for vcf_file_name in self.vcf_file_names:
            # TODO This is very similar to what we do in vcfpaste
            # Should abstract out in both cases so there's less repeated code
            input_stream = None
            if vcf_file_name.endswith('.gz'):
                input_stream = gzip.open(vcf_file_name, 'rb')
            else:
                input_stream = open(vcf_file_name, 'r')

            samples = l_bp.parse_vcf(input_stream, self.vcf_lines, self.vcf_headers, include_ref=self.include_ref)
            for sample in samples:
                self.vcf_headers.append("##SAMPLE=<ID=" + sample + ">\n")
                self.has_genotypes = True
            counter += 1
            if counter > self.batchsize:
                self.vcf_lines.sort(key=l_bp.vcf_line_key)
                self.write_temp_file()
                counter = 0
        # no need to write the final batch to file
        # FIXME Replace this with a new VCF class with the headers all added
        self.write_header()

        self.vcf_lines.sort(key=l_bp.vcf_line_key)
        iterables = self.temp_files + [self.vcf_lines]
        self.output_handle.writelines(merge(*iterables))
        self.close_tempfiles()
Exemple #4
0
    def execute(self):
        
        counter = 0
        for vcf_file_name in self.vcf_file_names:
            samples = l_bp.parse_vcf(vcf_file_name, self.vcf_lines, self.vcf_headers)
            for sample in samples:
                self.vcf_headers.append("##SAMPLE=<ID=" + sample + ">\n")
            counter += 1
            if counter > self.batchsize:
                self.vcf_lines.sort(key=l_bp.vcf_line_key)
                self.write_temp_file()
                counter = 0
        # no need to write the final batch to file
        self.write_header()

        self.vcf_lines.sort(key=l_bp.vcf_line_key)
        iterables = self.temp_files + [self.vcf_lines]
        sys.stdout.writelines(merge(*iterables))
Exemple #5
0
    def execute(self):
        
        counter = 0
        for vcf_file_name in self.vcf_file_names:
            samples = l_bp.parse_vcf(vcf_file_name, self.vcf_lines, self.vcf_headers)
            for sample in samples:
                self.vcf_headers.append("##SAMPLE=<ID=" + sample + ">\n")
            counter += 1
            if counter > self.batchsize:
                self.vcf_lines.sort(key=l_bp.vcf_line_key)
                self.write_temp_file()
                counter = 0
        # no need to write the final batch to file
        self.write_header()

        self.vcf_lines.sort(key=l_bp.vcf_line_key)
        iterables = self.temp_files + [self.vcf_lines]
        self.output_handle.writelines(merge(*iterables))
        self.close_tempfiles()