def preprocessing(fileobj: gzip.GzipFile) -> None: """Remove headers and columns to prepare TSV files in mtx directory to make them compatible for use with ScanPy methods.""" f = pd.read_table(fileobj, sep='\t') # Pandas dataframe if fileobj.name == 'genes.tsv.gz': col_to_keep = ['featurekey', 'featurename'] assert col_to_keep[0] in f.columns assert col_to_keep[1] in f.columns elif fileobj.name == 'cells.tsv.gz': fileobj.name = 'barcodes.tsv.gz' col_to_keep = 'cellkey' assert col_to_keep in f.columns else: raise ValueError( 'Expected genes.tsv.gz and cells.tsv.gz in directory.') f_new = f[col_to_keep] # Write to file without column or row headers. f_new.to_csv(first(os.path.splitext(fileobj.name)), index=False, header=False, sep='\t')