Esempio n. 1
0
def concatenate_vcf(infiles, outfile):
    '''
    Concatenate VCF files

    :param infiles: dictionary of input VCF files to be concatenated
    :param outfile: output VCF file
    '''
    if isinstance(infiles, dict):
        keys = infiles.keys()
        keys = sorted(keys)
        infiles = [infiles[val] for val in keys]

    with helpers.GetFileHandle(outfile, 'w') as ofile:
        header = None

        for ifile in infiles:

            if os.path.getsize(ifile) == 0:
                warnings.warn('input file {} is empty'.format(ifile))
                continue

            with helpers.GetFileHandle(ifile) as f:

                if not header:
                    header = _get_header(f)

                    for line in header:
                        ofile.write(line)
                else:
                    if not _get_header(f) == header:
                        warnings.warn(
                            'merging vcf files with mismatching headers')

                for l in f:
                    print >> ofile, l,
Esempio n. 2
0
    def write_csv_with_header(self, infile, headerless_input=True):
        with helpers.GetFileHandle(self.filepath, 'wt') as writer:
            with helpers.GetFileHandle(infile) as reader:
                if headerless_input:
                    writer.write(self.header_line)
                self.write_csv_data(reader, writer)

        self.__write_yaml()
Esempio n. 3
0
    def write_headerless_csv(self, infile):
        with helpers.GetFileHandle(self.filepath, 'wt') as writer:
            with helpers.GetFileHandle(infile) as reader:
                if not reader.readline() == self.header_line:
                    raise CsvWriterError("cannot write, wrong header")
                self.write_csv_data(reader, writer)

        self.__write_yaml()
Esempio n. 4
0
    def concatenate_files(self, infiles):
        header = self.header_line if self.header else None

        with helpers.GetFileHandle(self.filepath, 'wt') as writer:
            if header:
                writer.write(header)
            for infile in infiles:
                with helpers.GetFileHandle(infile) as reader:
                    self.write_csv_data(reader, writer)

        self.__write_yaml()
Esempio n. 5
0
 def generate_metadata(self):
     with helpers.GetFileHandle(self.filepath) as inputfile:
         header = inputfile.readline().strip()
         sep = self.__detect_sep_from_header(header)
         columns = header.split(sep)
         header = True
         dtypes = self.__generate_dtypes(sep=sep)
         return header, sep, dtypes, columns
Esempio n. 6
0
    def __write_yaml(self):

        yamldata = {'header': self.header, 'sep': self.sep, 'columns': []}

        for column in self.columns:
            data = {'name': column, 'dtype': self.dtypes[column]}
            yamldata['columns'].append(data)

        with helpers.GetFileHandle(self.yaml_file, 'wt') as f:
            yaml.safe_dump(yamldata, f, default_flow_style=False)
Esempio n. 7
0
    def __parse_metadata(self):
        with helpers.GetFileHandle(self.filepath + '.yaml') as yamlfile:
            yamldata = yaml.safe_load(yamlfile)

        header = yamldata['header']
        sep = yamldata.get('sep', ',')

        dtypes = {}
        columns = []
        for coldata in yamldata['columns']:
            colname = coldata['name']

            dtypes[colname] = coldata['dtype']

            columns.append(colname)

        return header, sep, dtypes, columns
Esempio n. 8
0
 def __detect_sep_from_file(self):
     with helpers.GetFileHandle(self.filepath) as reader:
         header = reader.readline().strip()
         return self.__detect_sep_from_header(header)