Exemple #1
0
class SpreadSheet:
    def __init__(self, user, pw):
        self.document_name = ''
        self.current_key = ''

        # Login process
        self.gd_client = SpreadsheetsService()
        self.gd_client.email = user
        self.gd_client.password = pw
        self.gd_client.ProgrammaticLogin()

    def set_document(self, name):
        self.document_name = name
        self._get_current_key()

    def _get_current_key(self):
        feed = self.gd_client.GetSpreadsheetsFeed()
        for entry in feed.entry:
            if entry.title.text == self.document_name:
                self.current_key = entry.id.text.split('/')[-1]
                break
            else:
                raise ValueError('document no found')

    def _get_worksheet_index(self):
        wksht_idx = len(
            self.gd_client.GetWorksheetsFeed(key=self.current_key).entry) - 1
        #print "Worksheet Index:", wksht_idx
        return wksht_idx

    def get_current_key(self):
        return self.current_key

    def get_worksheet_id(self):
        wksht_idx = self._get_worksheet_index()
        wksht_id = self.gd_client.GetWorksheetsFeed(
            key=self.current_key).entry[wksht_idx].id.text.rsplit('/', 1)[1]

        #print "Worksheet ID:", wksht_id
        return wksht_id

    def create_worksheet(self, name):
        worksheet = self.gd_client.AddWorksheet(title=name,
                                                row_count=100,
                                                col_count=20,
                                                key=self.current_key)

    def insert_row(self, data, wksht_id):
        for i, row in enumerate(data):
            for j, value in enumerate(row):
                self.gd_client.UpdateCell(row=i + 1,
                                          col=(j + 1),
                                          inputValue=value,
                                          key=self.current_key,
                                          wksht_id=wksht_id)
Exemple #2
0
class GDataClient(object):
    gd_client = None
    gd_cur_ss_key = None
    gd_cur_ws_id = None
    
    def __init__(self,uname,pswd,document_name=None,worksheet_name=None):
        # Connect to Google
        self.gd_client = SpreadsheetsService(email=uname,password=pswd,source=GDATA_SOURCE)
        self.gd_client.ProgrammaticLogin()
        if document_name is not None:
            self.set_document(document_name)
        if worksheet_name is not None:
            self.set_worksheet(worksheet_name)
    
    def __ss_check(self):
        ''' Make sure spreadsheet has been set before we try to do anything with worksheets.
        '''
        if self.gd_cur_ss_key is None:
            raise GDError('Must set spreadsheet before accessing worksheets!')

    def __header_to_key(self,hdr_string):
        ''' Google sheets column headers are used as keys to row info dictionaries, but first most non alphanumeric 
        characters are removed and the letters are lower-cased.
            
        '''
        return ''.join(re.findall('[a-z\-0-9\.]+',hdr_string.lower()))

    def set_document(self, docname):
        ''' Set current spreadsheet document given a title.
        '''
        q = DocumentQuery(params={'title':docname,'title-exact':'true'})
        ss_feed = self.gd_client.GetSpreadsheetsFeed(query=q)
        if len(ss_feed.entry) != 1:
            raise GDError('{} spreadsheets match the given name "{}" (expected exactly one)!'.format(len(ss_feed.entry),docname))
        self.gd_cur_ss_key = ss_feed.entry[0].id.text.rsplit('/',1)[1]

    def list_documents(self):
        ''' List all spreadsheet documents available.
        '''
        feed = self.gd_client.GetSpreadsheetsFeed(DocumentQuery())
        return [en.title.text for en in feed.entry]

    def list_worksheets(self):
        ''' List all worksheets in the current spreadsheet document.
        '''
        self.__ss_check()
        ws_feed = self.gd_client.GetWorksheetsFeed(self.gd_cur_ss_key)
        return [en.title.text for en in ws_feed.entry]

    def set_worksheet(self, sheetname):
        ''' Set current worksheet within the current spreadsheet document.
        '''
        self.__ss_check()
        q = DocumentQuery(params={'title':sheetname,'title-exact':'true'})
        ws_feed = self.gd_client.GetWorksheetsFeed(self.gd_cur_ss_key,query=q)
        if len(ws_feed.entry) != 1:
            raise GDError('{} worksheets match the given name "{}" (expected exactly one)!'.format(len(ws_feed.entry),sheetname))
        self.gd_cur_ws_id = ws_feed.entry[0].id.text.rsplit('/',1)[1]

    def add_worksheet(self, title, rows, cols, overwrite=False):
        ''' Add a worksheet to current spreadsheet document (if it does not exist).  Switch current sheet to the (new) guy.
        '''
        self.__ss_check()
        
        # First, check to see if the worksheet already exists
        q = DocumentQuery(params={'title':title,'title-exact':'true'})
        ws_feed = self.gd_client.GetWorksheetsFeed(self.gd_cur_ss_key,query=q)
        ws_found = None
        if len(ws_feed.entry) > 0:
            if overwrite:
                if not self.gd_client.DeleteWorksheet(ws_feed.entry[0]):
                    raise GDError('Failed to delete existing worksheet named {} to overwrite!'.format(title))
            else:
                ws_found = ws_feed.entry[0]

        is_new = ws_found is None
        if is_new:
            ws_found = self.gd_client.AddWorksheet(title,rows,cols,self.gd_cur_ss_key)

        self.gd_cur_ws_id = ws_found.id.text.rsplit('/',1)[1]

        return is_new

    def set_headers(self,header_names):
        ''' Set the current worksheet headers given a list.
        NOTE: header list length must match columns in sheet!
        '''
        self.__ss_check()
        ws = self.gd_client.GetWorksheetsFeed(self.gd_cur_ss_key,self.gd_cur_ws_id)
        if int(ws.col_count.text) != len(header_names):
            raise GDError('Number of headers ({}) does not match columns in spreadsheet ({})!'
                          .format(len(header_names),int(ws.col_count.text)))

        query = CellQuery(params={'min-row':'1','max-row':'1','min-col':'1','max-col':str(len(header_names))})
        cells = self.gd_client.GetCellsFeed(self.gd_cur_ss_key,self.gd_cur_ws_id,query=query)
        if len(cells.entry) == 0:
            query = CellQuery(params={'min-row':'1','max-row':'1','min-col':'1','max-col':str(len(header_names)),'return-empty':'true'})
            cells = self.gd_client.GetCellsFeed(self.gd_cur_ss_key,self.gd_cur_ws_id,query=query)

        batchRequest = SpreadsheetsCellsFeed()

        for idx,val in enumerate(header_names):
            cells.entry[idx].cell.inputValue = val
            batchRequest.AddUpdate(cells.entry[idx])

        updated = self.gd_client.ExecuteBatch(batchRequest,cells.GetBatchLink().href)


    def delete_rows(self,row_list):
        ''' Given a list of row numbers, delete the corresponding rows from the current sheet.
        @param row_list List of integers in 0:num_rows
        '''
        if len(row_list) == 0:
            return

        self.__ss_check()
        list_feed = self.gd_client.GetListFeed(self.gd_cur_ss_key, self.gd_cur_ws_id)
        ordered_list = sorted(row_list,reverse=True)
        if ordered_list[0] > int(list_feed.total_results.text):
            raise GDError('Tried to delete row {} but highest row number in sheet is {}!'.format(ordered_list[0],list_feed.total_results.text))

        for row in ordered_list:
            if not self.gd_client.DeleteRow(list_feed.entry[row]):
                raise GDError('Failed to delete row {} (partway through list: {})'.format(row,ordered_list))        
        
    def insert_rows(self,info_dict_list):
        ''' Add a group of rows based on list of dictionaries.

        @param info_dict_list Dictionary with keys for any headers with non-blank info in the new row 
        (all missing keys will be have blank data).
        '''
        self.__ss_check()
        # Check and make sure that none of the input dictionaries contains keys not in the first row of our list.
        hdr_list = [self.__header_to_key(hdr) for hdr in self.row_as_list(1)]
        bad_row = next((dd for dd in info_dict_list if len(set(dd.keys()) - set(hdr_list)) > 0), None)
        if bad_row is not None:
            raise GDError('Failed to insert row {} because it contains keys not in spreadsheet headers ({})!'
                          .format(bad_row,hdr_list))

        list_feed = self.gd_client.GetListFeed(self.gd_cur_ss_key, self.gd_cur_ws_id)
        for inf in info_dict_list:
            self.gd_client.InsertRow(inf,self.gd_cur_ss_key, self.gd_cur_ws_id)

    def column_as_list(self,column,with_header=False):
        ''' Read just a single column into a list of strings.

        Ignore the first row by default because it's the column header.
        '''
        self.__ss_check()
        if with_header:
            minrow = 1
        else:
            minrow = 2
        ws = self.gd_client.GetWorksheetsFeed(self.gd_cur_ss_key,self.gd_cur_ws_id)
        if int(ws.row_count.text) < minrow:
            return []

        q = CellQuery(params={'min-row':str(minrow),'min-col':str(column),'max-col':str(column)})
        cells = self.gd_client.GetCellsFeed(self.gd_cur_ss_key,self.gd_cur_ws_id,query=q)

        return [cellentry.cell.text for cellentry in cells.entry]

    def row_as_list(self,row):
        ''' Read just a single row into a list of strings.

        NOTE: this is indexed by cell, so row 1 is the header row!
        '''
        self.__ss_check()
        mincol = 1
        ws = self.gd_client.GetWorksheetsFeed(self.gd_cur_ss_key,self.gd_cur_ws_id)
        if int(ws.col_count.text) < mincol:
            return []

        q = CellQuery(params={'min-row':str(row),'min-col':str(mincol),'max-row':str(row)})
        cells = self.gd_client.GetCellsFeed(self.gd_cur_ss_key,self.gd_cur_ws_id,query=q)

        return [cellentry.cell.text for cellentry in cells.entry]
        

    def read_to_list(self,num_lines=None):
        """ Read the sheet into a list of lists of strings """
        self.__ss_check()
        if num_lines is not None:
            q = DocumentQuery(params={'max-results':'%d'%num_lines})
            list_feed = self.gd_client.GetListFeed(self.gd_cur_ss_key, self.gd_cur_ws_id,query=q)
        else:
            list_feed = self.gd_client.GetListFeed(self.gd_cur_ss_key, self.gd_cur_ws_id)

        string_list = []
        for i, entry in enumerate(list_feed.entry):
            row = {key:entry.custom[key].text for key in entry.custom}
            row['rowname'] = entry.title.text
            string_list.append(row)
        
        return string_list

    def read_to_dict(self,key_column_name,row_start=None,row_num=None):
        """ Read the sheet into a dictionary with keys given by the named column.
        
        NOTE: Raises an error on any redundant rows.
        """

        self.__ss_check()

        # Use this for testing to limit number of results handled:
        params = {}
        if row_start is not None:
            params['start-index'] = '%d'%row_start
        if row_num is not None:
            params['max-results'] = '%d'%row_num
        if len(params) != 0:
            q = ListQuery(params=params)
            list_feed = self.gd_client.GetListFeed(self.gd_cur_ss_key, self.gd_cur_ws_id,query=q)
        else:
            list_feed = self.gd_client.GetListFeed(self.gd_cur_ss_key, self.gd_cur_ws_id)

        multi_rows = []
        string_dict = {}
        for i, entry in enumerate(list_feed.entry):
            # If we get the title column, ignore it.
            if entry.custom[key_column_name].text.replace(' ','').lower() == key_column_name:
                continue
            row = {key:entry.custom[key].text for key in entry.custom if key is not key_column_name}
            row['rowname'] = entry.title.text
            key_name = entry.custom[key_column_name].text
            if key_name in string_dict:
                multi_rows.append(key_name)
            string_dict[key_name] = row

        if len(multi_rows) > 0:
            errors = 'read_to_dict -- Column {} contains multiple rows with each of the following values: {}'.format(key_column_name,multi_rows)
            # raise GDError('read_to_dict -- Column {} contains multiple rows with each of the following values: {}'.format(key_column_name,multi_rows))
        else:
            errors = None

        return string_dict,errors