Ejemplo n.º 1
0
    def readFromStream(self, stream, thedatasets, block=None):
        """Read data from stream, and write to thedatasets."""

        # loop over column range
        for index in xrange(self.startindex, self.stopindex+1):
            # name for variable
            if self.single:
                name = self.name
            else:
                name = '%s_%i' % (self.name, index)

            # if we're reading multiple blocks
            if block is not None:
                name += '_%i' % block

            # loop over columns until we run out, or we don't need any
            for col in self.columns:
                # get next column and return if we run out of data
                val = stream.nextColumn()
                if val is None:
                    return
                # append a suffix to specify whether error or value
                # \0 is used as the user cannot enter it
                fullname = '%s\0%s' % (name, col)

                # get dataset (or get new one)
                try:
                    dataset = thedatasets[fullname]
                except KeyError:
                    dataset = thedatasets[fullname] = []

                if not self.datatype:
                    # try to guess type of data
                    self.datatype = guessDataType(val)

                # convert according to datatype
                if self.datatype == 'float':
                    try:
                        # do conversion
                        dat = float(val)
                    except ValueError:
                        dat = N.nan
                        self.errorcount += 1
                        
                elif self.datatype == 'string':
                    if string_re.match(val):
                        # possible security issue:
                        # regular expression checks this is safe
                        try:
                            dat = eval(val)
                        except:
                            dat = val
                    else:
                        dat = val
                        
                elif self.datatype == 'date':
                    dat = utils.dateStringToDate(val)

                # add data into dataset
                dataset.append(dat)
Ejemplo n.º 2
0
    def readData(self):
        """Read the data into the document."""

        # open the csv file
        csvf = utils.UnicodeCSVReader(
            open(self.filename), delimiter=self.delimiter, quotechar=self.textdelimiter, encoding=self.encoding
        )

        # make in iterator for the file
        if self.readrows:
            it = _FileReaderRows(csvf)
        else:
            it = _FileReaderCols(csvf)

        # dataset names for each column
        self.colnames = {}
        # type of column (float, string or date)
        self.coltypes = []
        # type of names of columns
        self.nametypes = {}
        # ignore lines after headers
        self.colignore = defaultdict(lambda: int(self.headerignore))

        # iterate over each line (or column)
        while True:
            try:
                line = it.next()
            except StopIteration:
                break

            # iterate over items on line
            for colnum, col in enumerate(line):

                if colnum >= len(self.coltypes) or self.coltypes[colnum] == "":
                    ctype = "float"
                else:
                    ctype = self.coltypes[colnum]

                # ignore lines after headers
                if colnum < len(self.coltypes) and self.colignore[colnum] > 0:
                    self.colignore[colnum] -= 1
                    continue

                try:
                    # do any necessary conversion
                    if ctype == "float":
                        v = float(col)
                    elif ctype == "date":
                        v = utils.dateStringToDate(col)
                    elif ctype == "string":
                        v = col
                    else:
                        raise RuntimeError, "Invalid type in CSV reader"

                except ValueError:
                    if col.strip() == "":
                        # skip blanks unless blanksaredata is set
                        if self.blanksaredata and colnum < len(self.colnames):
                            # assumes a numeric data type
                            self.data[self.colnames[colnum]].append(N.nan)
                    elif colnum in self.colnames and len(self.data[self.colnames[colnum]]) == 0:
                        # if dataset is empty, convert to a string dataset
                        self._setNameAndType(colnum, self.colnames[colnum], "string")
                        self.data[self.colnames[colnum]].append(col)
                    else:
                        # start a new dataset if conversion failed
                        coltype, name = self._getNameAndColType(colnum, col)
                        self._setNameAndType(colnum, name.strip(), coltype)

                else:
                    # generate a name if required
                    if colnum not in self.colnames:
                        self._setNameAndType(colnum, self._generateName(colnum), "float")

                    # conversion okay
                    # append number to data
                    coldata = self.data[self.colnames[colnum]]
                    coldata.append(v)