def readFromStream(self, stream, thedatasets, block=None): """Read data from stream, and write to thedatasets.""" # loop over column range for index in xrange(self.startindex, self.stopindex+1): # name for variable if self.single: name = self.name else: name = '%s_%i' % (self.name, index) # if we're reading multiple blocks if block is not None: name += '_%i' % block # loop over columns until we run out, or we don't need any for col in self.columns: # get next column and return if we run out of data val = stream.nextColumn() if val is None: return # append a suffix to specify whether error or value # \0 is used as the user cannot enter it fullname = '%s\0%s' % (name, col) # get dataset (or get new one) try: dataset = thedatasets[fullname] except KeyError: dataset = thedatasets[fullname] = [] if not self.datatype: # try to guess type of data self.datatype = guessDataType(val) # convert according to datatype if self.datatype == 'float': try: # do conversion dat = float(val) except ValueError: dat = N.nan self.errorcount += 1 elif self.datatype == 'string': if string_re.match(val): # possible security issue: # regular expression checks this is safe try: dat = eval(val) except: dat = val else: dat = val elif self.datatype == 'date': dat = utils.dateStringToDate(val) # add data into dataset dataset.append(dat)
def readData(self): """Read the data into the document.""" # open the csv file csvf = utils.UnicodeCSVReader( open(self.filename), delimiter=self.delimiter, quotechar=self.textdelimiter, encoding=self.encoding ) # make in iterator for the file if self.readrows: it = _FileReaderRows(csvf) else: it = _FileReaderCols(csvf) # dataset names for each column self.colnames = {} # type of column (float, string or date) self.coltypes = [] # type of names of columns self.nametypes = {} # ignore lines after headers self.colignore = defaultdict(lambda: int(self.headerignore)) # iterate over each line (or column) while True: try: line = it.next() except StopIteration: break # iterate over items on line for colnum, col in enumerate(line): if colnum >= len(self.coltypes) or self.coltypes[colnum] == "": ctype = "float" else: ctype = self.coltypes[colnum] # ignore lines after headers if colnum < len(self.coltypes) and self.colignore[colnum] > 0: self.colignore[colnum] -= 1 continue try: # do any necessary conversion if ctype == "float": v = float(col) elif ctype == "date": v = utils.dateStringToDate(col) elif ctype == "string": v = col else: raise RuntimeError, "Invalid type in CSV reader" except ValueError: if col.strip() == "": # skip blanks unless blanksaredata is set if self.blanksaredata and colnum < len(self.colnames): # assumes a numeric data type self.data[self.colnames[colnum]].append(N.nan) elif colnum in self.colnames and len(self.data[self.colnames[colnum]]) == 0: # if dataset is empty, convert to a string dataset self._setNameAndType(colnum, self.colnames[colnum], "string") self.data[self.colnames[colnum]].append(col) else: # start a new dataset if conversion failed coltype, name = self._getNameAndColType(colnum, col) self._setNameAndType(colnum, name.strip(), coltype) else: # generate a name if required if colnum not in self.colnames: self._setNameAndType(colnum, self._generateName(colnum), "float") # conversion okay # append number to data coldata = self.data[self.colnames[colnum]] coldata.append(v)