Example #1
0
def concatenateLookups(filepath, outfname, pattern=None, fnames=None):
   '''
   @summary: Concatenate named files or files matching pattern into a single file. 
   @param filepath: Pathname to input files
   @param outfname: Basename of output file
   @param pattern: Pattern to match for input files
   @param fnames: Basename of one or more input file
   '''
   outfname = os.path.join(filepath, outfname)
   infnames = []
   try:
      csvwriter, outf = getCSVWriter(outfname, DELIMITER)

      if pattern is not None:
         infnames = glob.glob(os.path.join(filepath, pattern))
      if fnames is not None:
         for fn in fnames:
            infnames.append(os.path.join(filepath, fn))            

      for fname in infnames:
         csvreader, inf = getCSVReader(fname, DELIMITER)
         while csvreader is not None:
            try:
               line = csvreader.next()
            except OverflowError, e:
               print( 'Overflow on line {} ({})'.format(csvreader.line, str(e)))
            except StopIteration:
               print('EOF after line {}'.format(csvreader.line_num))
               csvreader = None
               inf.close()
            except Exception, e:
               print('Bad record on line {} ({})'.format(csvreader.line_num, e))
Example #2
0
   def merge(self):
      """
      @summary: Merge sorted files into a single larger sorted file.
      """
      self._setLogger(self.tidyfile.replace('csv', 'log'))
      rdrRecs = self._getSplitReadersFirstRecs()
      writer, outf = getCSVWriter(self.tidyfile, self.delimiter)
      self._openfiles[self.tidyfile] = outf

      rec = self._getHeader()
      while rec is not None:
         writer.writerow(rec)
         rec = self._getSmallestRec(rdrRecs)
      self.closeOne(self.tidyfile)
Example #3
0
    def openInputOutput(self):
        '''
      @summary: Read GBIF metadata, open GBIF interpreted data for reading, 
                output file for writing
      '''
        self.fldMeta = self.getFieldMeta()

        (self._iCsvrdr, self._if) = getCSVReader(self.interpFname, DELIMITER)

        (self._outWriter, self._outf) = getCSVWriter(self.outFname,
                                                     DELIMITER,
                                                     doAppend=False)
        # Write the header row
        self._outWriter.writerow(ORDERED_OUT_FIELDS)
        self._log.info('Opened input/output files')
Example #4
0
 def _openWriter(self, fname):
    writer, outf = getCSVWriter(fname, self.delimiter)
    self._openfiles[fname] = outf
    return writer
Example #5
0
                # read lookup vals into dictionary
                while (line is not None):
                    line, recno = self.getLine(csvRdr, recno)
                    if line and len(line) > 0:
                        try:
                            # First item is dict key, rest are vals
                            lookupDict[line[0]] = line[1:]
                        except Exception, e:
                            self._log.warn(
                                'Failed to read line {} from {}'.format(
                                    recno, fname))
                self._log.info('Read lookup file {}'.format(fname))
            finally:
                infile.close()

        outWriter, outfile = getCSVWriter(fname, DELIMITER, doAppend=doAppend)
        self._log.info('Re-opened lookup file {} for appending'.format(fname))

        if not doAppend and header is not None:
            outWriter.writerow(header)

        return lookupDict, outWriter, outfile

    # ...............................................
    def openInputOutput(self):
        '''
      @summary: Read GBIF metadata, open GBIF interpreted data for reading, 
                output file for writing
      '''
        self.fldMeta = self.getFieldMeta()
Example #6
0
def _getNextWriter(bigFname, currFnum):
   bigbasefname, ext  = os.path.splitext(bigFname)
   newfname = '{}_{}{}'.format(bigbasefname, currFnum, ext)
   csvwriter, outf = getCSVWriter(newfname, DELIMITER, doAppend=False)
   return csvwriter, outf