Esempio n. 1
0
    def write(self, data_df, version_line=True):
        '''Write the given data to a GFF3 file, using the converter if given.

        Generates an empty file if given an empty DataFrame.

        Args:
            version_line (bool): If True, write the GFF3 version line at the.
            Note that this will cause an existing file to be overwritten, but
            will only be added in the first call to `write`.
        '''

        if self.filename is None:
            raise ValueError('Trying to write to filename None! Give GFF3Writer'
                             ' a filename.')

        if len(data_df) == 0:
            warn_empty('Writing out an empty GFF3 file to {0}'.format(self.filename))
            touch(self.filename)
            return
            
        if not self.created and version_line is True:
            with open(self.filename, 'w') as fp:
                fp.write(self.version_line + '\n')

        with open(self.filename, 'a') as fp:
            self.created = True

            if self.converter is not None:
                data_df = self.convert(data_df)
            else:
                self.mangle_coordinates(data_df)

            data_df.to_csv(fp, sep='\t', na_rep='.', columns=[k for k, v in GFF3Parser.columns],
                           index=False, header=False, quoting=csv.QUOTE_NONE,
                           float_format='%.6e')
Esempio n. 2
0
 def cmd():
     writer = GFF3Writer(output_filename,
                         converter=cmscan_to_gff3,
                         database=database)
     try:
         for group in InfernalParser(input_filename):
             writer.write(group)
     except EmptyFile as e:
         touch(output_filename)
Esempio n. 3
0
 def cmd():
     writer = GFF3Writer(output_filename,
                         converter=hmmscan_to_gff3,
                         database=database)
     try:
         for group in pd.read_csv(input_filename, chunksize=10000):
             writer.write(group)
     except EmptyFile as e:
         touch(output_filename)
Esempio n. 4
0
    def cmd():
        it = pd.read_csv(input_filename, chunksize=10000)
        writer = GFF3Writer(output_filename,
                            converter=shmlast_to_gff3,
                            database=database)

        try:
            for group in it:
                writer.write(group)
        except EmptyFile:
            touch(output_filename)
Esempio n. 5
0
 def cmd():
     if input_filename.endswith('.csv') or input_filename.endswith('.tsv'):
         it = pd.read_csv(input_filename, chunksize=10000)
     else:
         it = MafParser(input_filename)
     writer = GFF3Writer(output_filename,
                         converter=maf_to_gff3,
                         database=database)
     try:
         for group in it:
             writer.write(group)
     except EmptyFile:
         touch(output_filename)