def concatenate_vcf(infiles, outfile): ''' Concatenate VCF files :param infiles: dictionary of input VCF files to be concatenated :param outfile: output VCF file ''' if isinstance(infiles, dict): keys = infiles.keys() keys = sorted(keys) infiles = [infiles[val] for val in keys] with helpers.GetFileHandle(outfile, 'w') as ofile: header = None for ifile in infiles: if os.path.getsize(ifile) == 0: warnings.warn('input file {} is empty'.format(ifile)) continue with helpers.GetFileHandle(ifile) as f: if not header: header = _get_header(f) for line in header: ofile.write(line) else: if not _get_header(f) == header: warnings.warn( 'merging vcf files with mismatching headers') for l in f: print >> ofile, l,
def write_csv_with_header(self, infile, headerless_input=True): with helpers.GetFileHandle(self.filepath, 'wt') as writer: with helpers.GetFileHandle(infile) as reader: if headerless_input: writer.write(self.header_line) self.write_csv_data(reader, writer) self.__write_yaml()
def write_headerless_csv(self, infile): with helpers.GetFileHandle(self.filepath, 'wt') as writer: with helpers.GetFileHandle(infile) as reader: if not reader.readline() == self.header_line: raise CsvWriterError("cannot write, wrong header") self.write_csv_data(reader, writer) self.__write_yaml()
def concatenate_files(self, infiles): header = self.header_line if self.header else None with helpers.GetFileHandle(self.filepath, 'wt') as writer: if header: writer.write(header) for infile in infiles: with helpers.GetFileHandle(infile) as reader: self.write_csv_data(reader, writer) self.__write_yaml()
def generate_metadata(self): with helpers.GetFileHandle(self.filepath) as inputfile: header = inputfile.readline().strip() sep = self.__detect_sep_from_header(header) columns = header.split(sep) header = True dtypes = self.__generate_dtypes(sep=sep) return header, sep, dtypes, columns
def __write_yaml(self): yamldata = {'header': self.header, 'sep': self.sep, 'columns': []} for column in self.columns: data = {'name': column, 'dtype': self.dtypes[column]} yamldata['columns'].append(data) with helpers.GetFileHandle(self.yaml_file, 'wt') as f: yaml.safe_dump(yamldata, f, default_flow_style=False)
def __parse_metadata(self): with helpers.GetFileHandle(self.filepath + '.yaml') as yamlfile: yamldata = yaml.safe_load(yamlfile) header = yamldata['header'] sep = yamldata.get('sep', ',') dtypes = {} columns = [] for coldata in yamldata['columns']: colname = coldata['name'] dtypes[colname] = coldata['dtype'] columns.append(colname) return header, sep, dtypes, columns
def __detect_sep_from_file(self): with helpers.GetFileHandle(self.filepath) as reader: header = reader.readline().strip() return self.__detect_sep_from_header(header)