def _processfile(self, path_or_file, *args, **kwds): self.info("START") self._file = path_or_file if isinstance(path_or_file, (str, basestring)): self._file = File.guess(path_or_file, **getkwds(kwds, pd.read_csv)) self.filename = self._file.basename() if kwds.get('testbadlines') and isinstance(self._file, Csv): self.info("Checking rows in '%s' for embedded delimiters." % self.filename) self.badlines = Csv.locate_badlines(self._file.path, delimiter=self._file.delimiter) self.badlinescount = len(self.badlines) if self.badlinescount >= 1: self.warning("%s bad lines have been found in '%s'." % (self.badlinescount, self.filename)) _ = newfolder(kwds.get('outdir', 'processed')) outfile = kwds.get('outfile') if not outfile: outfile = self._file.get_outfile(self.filename, dirname=_) createcsv(outfile, self.fields) for df in self._file.dfreader: try: df = self.process(df, *args, **kwds) self.countsout += self.countvalues(df) self.normalized += len(df) except IncompleteExcelFile as e: self.incomplete_excel += 1 File.append(outfile, df.to_csvstring(header=False)) self.info("%s rows written to %s" % (len(df), outfile)) gc.disable() gc.collect() self.emptysheets = getattr(self._file, 'emptysheets', None) self.info("END") print return self.evaluate()