def concatenateLookups(filepath, outfname, pattern=None, fnames=None): ''' @summary: Concatenate named files or files matching pattern into a single file. @param filepath: Pathname to input files @param outfname: Basename of output file @param pattern: Pattern to match for input files @param fnames: Basename of one or more input file ''' outfname = os.path.join(filepath, outfname) infnames = [] try: csvwriter, outf = getCSVWriter(outfname, DELIMITER) if pattern is not None: infnames = glob.glob(os.path.join(filepath, pattern)) if fnames is not None: for fn in fnames: infnames.append(os.path.join(filepath, fn)) for fname in infnames: csvreader, inf = getCSVReader(fname, DELIMITER) while csvreader is not None: try: line = csvreader.next() except OverflowError, e: print( 'Overflow on line {} ({})'.format(csvreader.line, str(e))) except StopIteration: print('EOF after line {}'.format(csvreader.line_num)) csvreader = None inf.close() except Exception, e: print('Bad record on line {} ({})'.format(csvreader.line_num, e))
def merge(self): """ @summary: Merge sorted files into a single larger sorted file. """ self._setLogger(self.tidyfile.replace('csv', 'log')) rdrRecs = self._getSplitReadersFirstRecs() writer, outf = getCSVWriter(self.tidyfile, self.delimiter) self._openfiles[self.tidyfile] = outf rec = self._getHeader() while rec is not None: writer.writerow(rec) rec = self._getSmallestRec(rdrRecs) self.closeOne(self.tidyfile)
def openInputOutput(self): ''' @summary: Read GBIF metadata, open GBIF interpreted data for reading, output file for writing ''' self.fldMeta = self.getFieldMeta() (self._iCsvrdr, self._if) = getCSVReader(self.interpFname, DELIMITER) (self._outWriter, self._outf) = getCSVWriter(self.outFname, DELIMITER, doAppend=False) # Write the header row self._outWriter.writerow(ORDERED_OUT_FIELDS) self._log.info('Opened input/output files')
def _openWriter(self, fname): writer, outf = getCSVWriter(fname, self.delimiter) self._openfiles[fname] = outf return writer
# read lookup vals into dictionary while (line is not None): line, recno = self.getLine(csvRdr, recno) if line and len(line) > 0: try: # First item is dict key, rest are vals lookupDict[line[0]] = line[1:] except Exception, e: self._log.warn( 'Failed to read line {} from {}'.format( recno, fname)) self._log.info('Read lookup file {}'.format(fname)) finally: infile.close() outWriter, outfile = getCSVWriter(fname, DELIMITER, doAppend=doAppend) self._log.info('Re-opened lookup file {} for appending'.format(fname)) if not doAppend and header is not None: outWriter.writerow(header) return lookupDict, outWriter, outfile # ............................................... def openInputOutput(self): ''' @summary: Read GBIF metadata, open GBIF interpreted data for reading, output file for writing ''' self.fldMeta = self.getFieldMeta()
def _getNextWriter(bigFname, currFnum): bigbasefname, ext = os.path.splitext(bigFname) newfname = '{}_{}{}'.format(bigbasefname, currFnum, ext) csvwriter, outf = getCSVWriter(newfname, DELIMITER, doAppend=False) return csvwriter, outf