Ejemplo n.º 1
0
    def read(self, wsname):
        ext = os.path.splitext(wsname)[1]
        if ext == "":
            label = wsname
            csvFileName = wsname + ".csv"
        elif ext in self._exts:
            label = wsname[: -len(ext)]
            csvFileName = wsname
        else:
            raise ValueError("Filetype " + ext + " not supported (" + wsname + ")")

        csvFile = open(self._path + csvFileName)
        stuffReader = csv.reader(csvFile, delimiter=",")
        columns = stuffReader.next()
        if not "uid" in columns:
            raise ValueError("Unique id not defined in " + self._path)

        print columns
        list = GDatasetList(label)
        list._columns = columns[:]
        for i, row in enumerate(stuffReader):
            ds = dict(zip(columns, row))
            gds = GDataset()
            gds._fields = ds
            list.add(i, gds)

        csvFile.close()
        return list
Ejemplo n.º 2
0
    def read(self, wsname):
        ext = os.path.splitext(wsname)[1]
        if ext == '':
            label = wsname
            csvFileName = wsname + '.csv'
        elif ext in self._exts:
            label = wsname[:-len(ext)]
            csvFileName = wsname
        else:
            raise ValueError('Filetype ' + ext + ' not supported (' + wsname +
                             ')')

        csvFile = open(self._path + csvFileName)
        stuffReader = csv.reader(csvFile, delimiter=',')
        columns = stuffReader.next()
        if not 'uid' in columns:
            raise ValueError('Unique id not defined in ' + self._path)

        print columns
        list = GDatasetList(label)
        list._columns = columns[:]
        for i, row in enumerate(stuffReader):
            ds = dict(zip(columns, row))
            gds = GDataset()
            gds._fields = ds
            list.add(i, gds)

        csvFile.close()
        return list
Ejemplo n.º 3
0
    def read(self, wsname):
        spreadsheet_feed = self._client.GetFeed(self.feedUrl())
        tabList = [
            entry.title.text
            for (i, entry) in enumerate(spreadsheet_feed.entry)
        ]
        try:
            tabIndex = tabList.index(wsname)
        except:
            return None

        ws_entry = spreadsheet_feed.entry[tabIndex]
        wskey = ws_entry.id.text.rsplit('/')[-1]

        ws_feedurl = 'https://spreadsheet.google.com/feeds/list/' + self._skey + '/' + wskey + '/public/values'

        worksheets_feed = self._client.GetFeed(ws_feedurl)

        list = GDatasetList(wsname)
        # check for non
        header = {}
        # remember the order
        for i, entry in enumerate(worksheets_feed.entry):
            ds = {}
            columnId = 0
            for (j, elem) in enumerate(entry.extension_elements):
                if elem.tag[0] == '_':
                    continue

                ds[elem.tag] = elem.text if elem.text else ''

                if not elem.tag in header.iterkeys():
                    header[elem.tag] = columnId
                    list._columns.append(elem.tag)
                elif columnId != header[elem.tag]:
                    raise ValueError('Corrupted Table: tag/column ' +
                                     elem.tag + ' index mismatch! j = ' +
                                     str(columnId) + ' header[tag] = ' +
                                     str(header[elem.tag]) +
                                     '. Missing column name')

                # increase the counter
                columnId += 1

            if not 'uid' in ds.iterkeys():
                raise ValueError('Unique id not defined for in row ' + str(i))
            gds = GDataset()
            gds._fields = ds
            list.add(i, gds)

        return list
Ejemplo n.º 4
0
    def read(self, wsname):
        spreadsheet_feed = self._client.GetFeed( self.feedUrl() )
        tabList = [ entry.title.text  for (i,entry) in enumerate(spreadsheet_feed.entry) ]
        try:
            tabIndex = tabList.index(wsname)
        except:
            return None

        ws_entry = spreadsheet_feed.entry[tabIndex]  
        wskey = ws_entry.id.text.rsplit('/')[-1]  
        
        ws_feedurl = 'https://spreadsheet.google.com/feeds/list/'+self._skey+'/'+wskey+'/public/values'

        worksheets_feed = self._client.GetFeed(ws_feedurl)  

        list = GDatasetList(wsname)
        # check for non
        header = {}
        # remember the order
        for i, entry in enumerate(worksheets_feed.entry):
            ds = {}
            columnId = 0
            for (j,elem) in enumerate(entry.extension_elements):
                if elem.tag[0] == '_':
                    continue

                ds[ elem.tag ] = elem.text if elem.text else ''

                if not elem.tag in header.iterkeys():
                    header[elem.tag] = columnId
                    list._columns.append(elem.tag)
                elif columnId != header[elem.tag]:
                    raise ValueError('Corrupted Table: tag/column '+elem.tag+' index mismatch! j = '+str(columnId)+' header[tag] = '+str(header[elem.tag])+'. Missing column name' )
                
                # increase the counter
                columnId += 1

            if not 'uid' in ds.iterkeys():
                raise ValueError('Unique id not defined for in row '+str(i))
            gds = GDataset()
            gds._fields = ds
            list.add(i,gds)

        return list
Ejemplo n.º 5
0
    def read(self, wsname):
        spreadsheet_feed = self._client.GetFeed(self.feedUrl())
        tabList = [entry.title.text for entry in spreadsheet_feed.entry]
        try:
            tabIndex = tabList.index(wsname)
        except:
            return None

        ws_entry = spreadsheet_feed.entry[tabIndex]
        wskey = ws_entry.id.text.rsplit('/')[-1]

        # can be retrieved from the entry itself
        ws_feedurl = 'https://spreadsheet.google.com/feeds/cells/' + self._skey + '/' + wskey + '/public/values'

        cell_feed = self._client.GetFeed(ws_feedurl)

        # scan the table, find max col and max row
        cols = set()
        rows = set()
        for entry in cell_feed.entry:
            element = entry.extension_elements[0]
            c = int(element.attributes['col'])
            r = int(element.attributes['row'])
            cols.add(c)
            rows.add(r)

        ncols = max(cols)
        nrows = max(rows)
        # build an empty table
        table = [[None] * ncols for i in xrange(nrows)]

        # reloop to fill
        for entry in cell_feed.entry:
            element = entry.extension_elements[0]

            c = int(element.attributes['col'])
            r = int(element.attributes['row'])

            table[r - 1][c - 1] = element.text

        # the header is the first row. This might change
        columns = table.pop(0)
        required = ['uid', 'events']
        if not all([key in columns for key in required]):
            raise RuntimeError('The columns ' + ', '.join(required) +
                               ' are required. Add them to the table')
        list = GDatasetList(wsname)
        list._columns = columns
        header = dict(enumerate(columns))

        for r, row in enumerate(table):
            ds = {}
            for c, cell in enumerate(row):
                ds[header[c]] = cell


#             print ds
            if any([not ds[key] for key in required]):
                #             if not ds['uid'] or not ds['events']:
                # skip rows with not uid or no nEvents
                continue
            gds = GDataset()
            gds._fields = ds
            list.add(r, gds)

        return list
Ejemplo n.º 6
0
    def read(self, wsname):
        spreadsheet_feed = self._client.GetFeed( self.feedUrl() )
        tabList = [ entry.title.text  for entry in spreadsheet_feed.entry ]
        try:
            tabIndex = tabList.index(wsname)
        except:
            return None

        ws_entry = spreadsheet_feed.entry[tabIndex]  
        wskey = ws_entry.id.text.rsplit('/')[-1]  

        # can be retrieved from the entry itself
        ws_feedurl = 'https://spreadsheet.google.com/feeds/cells/'+self._skey+'/'+wskey+'/public/values'

        cell_feed = self._client.GetFeed(ws_feedurl)  

        # scan the table, find max col and max row
        cols = set()
        rows = set()
        for entry in cell_feed.entry:
            element = entry.extension_elements[0]
            c = int(element.attributes['col'])
            r = int(element.attributes['row'])
            cols.add(c)
            rows.add(r)

        ncols = max(cols)
        nrows = max(rows)
        # build an empty table
        table = [[None]*ncols for i in xrange(nrows)]
 
        # reloop to fill
        for entry in cell_feed.entry:
            element = entry.extension_elements[0]

            c = int(element.attributes['col'])
            r = int(element.attributes['row'])

            table[r-1][c-1] = element.text

        # the header is the first row. This might change
        columns = table.pop(0)
        required = ['uid','events']
        if not all([ key in columns for key in required ]):
            raise RuntimeError('The columns '+', '.join(required) +' are required. Add them to the table')
        list = GDatasetList(wsname)
        list._columns = columns
        header = dict(enumerate(columns))

        for r,row in enumerate(table):
            ds = {}
            for c,cell in enumerate(row):
                ds[header[c]] = cell
            
#             print ds
            if any( [ not ds[key] for key in required ]):
#             if not ds['uid'] or not ds['events']:
                # skip rows with not uid or no nEvents
                continue
            gds = GDataset()
            gds._fields = ds
            list.add(r,gds)

        return list