Beispiel #1
0
class GoogleConn:
    def __init__(self, usr, pwd, form):
        global FORM_COLS
        self.gc = SpreadsheetsService()
        self.gc.ClientLogin(username=usr, password=pwd, source=APP_NAME)
        query = DocumentQuery()
        query.title = form
        feed = self.gc.GetSpreadsheetsFeed(query=query)
        if not feed.entry: raise Exception, "empty spreadsheet list"
        self.formid = feed.entry[0].id.text.rsplit('/', 1)[1]
        #        self.update = feed.entry[0].updated.text
        query = CellQuery()
        query.range = '%c1:%c1' % tuple(i + ord('A')
                                        for i in (0, len(FORM_KEYS) - 1))
        feed = self.gc.GetCellsFeed(self.formid, query=query)
        FORM_COLS = tuple(''.join(c for c in cell.content.text.lower()
                                  if c.isalnum() or c in '-_')
                          for cell in feed.entry)
        self.statcol = FORM_COLS[FORM_KEYS.index('status')]

    def get_requests(self):
        feed = self.gc.GetListFeed(self.formid)
        return [
            Request(row) for row in feed.entry
            if not row.custom[self.statcol].text
        ]

    def update_request(self, request):
        request.entry = self.gc.UpdateRow(request.entry, request.data())
Beispiel #2
0
def get_cos_contributors():
    ''' returns a list of contribtors from a google spreadsheet
    specified by spreadsheet ID '''

    client = SpreadsheetsService()

    feed = client.GetWorksheetsFeed(DOC_KEY, visibility='public', wksht_id='1')
    sheet_key = feed.id.text.rsplit('/', 1)[1]

    list_of_rows = client.GetListFeed(DOC_KEY, sheet_key,
                                      visibility='public').entry
    print('There are {} rows on the COS spreadsheet.'.format(
        len(list_of_rows)))

    cos_users = []
    for row in list_of_rows:
        row_dict = row.custom
        cos_users.append(row_dict[COL_ROW_NAME].text)

    return cos_users
class SheetQuerier:
    key = "0AkGO8UqErL6idDhYYjg1ZXlORnRaM3ZhTks4Z3FrYlE"
    worksheets = {"DVD": "movietitle", "Blu-ray": "movietitle"}
    cache = "spreadsheet_cache.p"
    cache_timeout = 60 * 60 * 24 * 7
    prefixes = ["The", "A"]
    whitespace_pattern = re.compile('[^a-zA-Z0-9 ]+', re.UNICODE)

    def __init__(self, force_reload=False):
        try:
            pickle_date, self.data = pickle.load(open(self.cache, "rb"))
            if DEBUG:
                print "Loaded cache successfully"
            if force_reload or time.time() - pickle_date > self.cache_timeout:
                if DEBUG:
                    if force_reload:
                        print "Cache update forced."
                    else:
                        print "Cache too old, reloading."
                self.reload_data()
                pickle.dump((time.time(), self.data), open(self.cache, "wb"))
        except:
            if DEBUG:
                print "Problem loading cache, reloading."
            self.reload_data()
            pickle.dump((time.time(), self.data), open(self.cache, "wb"))

    def reload_data(self):
        self.client = SpreadsheetsService()
        feed = self.client.GetWorksheetsFeed(self.key,
                                             visibility='public',
                                             projection='basic')
        self.data = {}
        for entry in feed.entry:
            if entry.title.text in self.worksheets.keys():
                bad_rows, total_rows = self.process_sheet(
                    entry.id.text.split("/")[-1],
                    self.worksheets[entry.title.text])
                print "Skipped %d / %d rows in sheet \"%s\"" % (
                    bad_rows, total_rows, entry.title.text)
            elif DEBUG:
                print "Skipped sheet \"%s\"" % entry.title.text

    def process_sheet(self,
                      sheet_key,
                      movie_row_key,
                      type_row_key="forcedsubtitletype"):
        if DEBUG:
            print "Document: %s" % self.key
            print "Sheet: %s" % sheet_key

        rows = self.client.GetListFeed(self.key,
                                       sheet_key,
                                       visibility='public',
                                       projection='values').entry
        bad_rows = 0
        for row in rows:
            try:
                self.data[SheetQuerier.clean_title(
                    row.custom[movie_row_key].text.strip(
                    ))] = row.custom[type_row_key].text.strip()
            except:
                bad_rows += 1
        return bad_rows, len(rows)

    def query_exact(self, title):
        query = SheetQuerier.clean_title(title)
        if query in self.data:
            return self.data[query]
        else:
            return False

    @staticmethod
    def clean_title(title):
        # Move prefixes
        for prefix in SheetQuerier.prefixes:
            if title.endswith(prefix):
                title = "%s %s" % (prefix, title[:-1 * len(prefix) - 2])
                break
        # Strip all non alpha-numeric characters
        title = SheetQuerier.whitespace_pattern.sub('', title)

        # Return the lowercase version
        return title.lower()
Beispiel #4
0
class GDataClient(object):
    gd_client = None
    gd_cur_ss_key = None
    gd_cur_ws_id = None
    
    def __init__(self,uname,pswd,document_name=None,worksheet_name=None):
        # Connect to Google
        self.gd_client = SpreadsheetsService(email=uname,password=pswd,source=GDATA_SOURCE)
        self.gd_client.ProgrammaticLogin()
        if document_name is not None:
            self.set_document(document_name)
        if worksheet_name is not None:
            self.set_worksheet(worksheet_name)
    
    def __ss_check(self):
        ''' Make sure spreadsheet has been set before we try to do anything with worksheets.
        '''
        if self.gd_cur_ss_key is None:
            raise GDError('Must set spreadsheet before accessing worksheets!')

    def __header_to_key(self,hdr_string):
        ''' Google sheets column headers are used as keys to row info dictionaries, but first most non alphanumeric 
        characters are removed and the letters are lower-cased.
            
        '''
        return ''.join(re.findall('[a-z\-0-9\.]+',hdr_string.lower()))

    def set_document(self, docname):
        ''' Set current spreadsheet document given a title.
        '''
        q = DocumentQuery(params={'title':docname,'title-exact':'true'})
        ss_feed = self.gd_client.GetSpreadsheetsFeed(query=q)
        if len(ss_feed.entry) != 1:
            raise GDError('{} spreadsheets match the given name "{}" (expected exactly one)!'.format(len(ss_feed.entry),docname))
        self.gd_cur_ss_key = ss_feed.entry[0].id.text.rsplit('/',1)[1]

    def list_documents(self):
        ''' List all spreadsheet documents available.
        '''
        feed = self.gd_client.GetSpreadsheetsFeed(DocumentQuery())
        return [en.title.text for en in feed.entry]

    def list_worksheets(self):
        ''' List all worksheets in the current spreadsheet document.
        '''
        self.__ss_check()
        ws_feed = self.gd_client.GetWorksheetsFeed(self.gd_cur_ss_key)
        return [en.title.text for en in ws_feed.entry]

    def set_worksheet(self, sheetname):
        ''' Set current worksheet within the current spreadsheet document.
        '''
        self.__ss_check()
        q = DocumentQuery(params={'title':sheetname,'title-exact':'true'})
        ws_feed = self.gd_client.GetWorksheetsFeed(self.gd_cur_ss_key,query=q)
        if len(ws_feed.entry) != 1:
            raise GDError('{} worksheets match the given name "{}" (expected exactly one)!'.format(len(ws_feed.entry),sheetname))
        self.gd_cur_ws_id = ws_feed.entry[0].id.text.rsplit('/',1)[1]

    def add_worksheet(self, title, rows, cols, overwrite=False):
        ''' Add a worksheet to current spreadsheet document (if it does not exist).  Switch current sheet to the (new) guy.
        '''
        self.__ss_check()
        
        # First, check to see if the worksheet already exists
        q = DocumentQuery(params={'title':title,'title-exact':'true'})
        ws_feed = self.gd_client.GetWorksheetsFeed(self.gd_cur_ss_key,query=q)
        ws_found = None
        if len(ws_feed.entry) > 0:
            if overwrite:
                if not self.gd_client.DeleteWorksheet(ws_feed.entry[0]):
                    raise GDError('Failed to delete existing worksheet named {} to overwrite!'.format(title))
            else:
                ws_found = ws_feed.entry[0]

        is_new = ws_found is None
        if is_new:
            ws_found = self.gd_client.AddWorksheet(title,rows,cols,self.gd_cur_ss_key)

        self.gd_cur_ws_id = ws_found.id.text.rsplit('/',1)[1]

        return is_new

    def set_headers(self,header_names):
        ''' Set the current worksheet headers given a list.
        NOTE: header list length must match columns in sheet!
        '''
        self.__ss_check()
        ws = self.gd_client.GetWorksheetsFeed(self.gd_cur_ss_key,self.gd_cur_ws_id)
        if int(ws.col_count.text) != len(header_names):
            raise GDError('Number of headers ({}) does not match columns in spreadsheet ({})!'
                          .format(len(header_names),int(ws.col_count.text)))

        query = CellQuery(params={'min-row':'1','max-row':'1','min-col':'1','max-col':str(len(header_names))})
        cells = self.gd_client.GetCellsFeed(self.gd_cur_ss_key,self.gd_cur_ws_id,query=query)
        if len(cells.entry) == 0:
            query = CellQuery(params={'min-row':'1','max-row':'1','min-col':'1','max-col':str(len(header_names)),'return-empty':'true'})
            cells = self.gd_client.GetCellsFeed(self.gd_cur_ss_key,self.gd_cur_ws_id,query=query)

        batchRequest = SpreadsheetsCellsFeed()

        for idx,val in enumerate(header_names):
            cells.entry[idx].cell.inputValue = val
            batchRequest.AddUpdate(cells.entry[idx])

        updated = self.gd_client.ExecuteBatch(batchRequest,cells.GetBatchLink().href)


    def delete_rows(self,row_list):
        ''' Given a list of row numbers, delete the corresponding rows from the current sheet.
        @param row_list List of integers in 0:num_rows
        '''
        if len(row_list) == 0:
            return

        self.__ss_check()
        list_feed = self.gd_client.GetListFeed(self.gd_cur_ss_key, self.gd_cur_ws_id)
        ordered_list = sorted(row_list,reverse=True)
        if ordered_list[0] > int(list_feed.total_results.text):
            raise GDError('Tried to delete row {} but highest row number in sheet is {}!'.format(ordered_list[0],list_feed.total_results.text))

        for row in ordered_list:
            if not self.gd_client.DeleteRow(list_feed.entry[row]):
                raise GDError('Failed to delete row {} (partway through list: {})'.format(row,ordered_list))        
        
    def insert_rows(self,info_dict_list):
        ''' Add a group of rows based on list of dictionaries.

        @param info_dict_list Dictionary with keys for any headers with non-blank info in the new row 
        (all missing keys will be have blank data).
        '''
        self.__ss_check()
        # Check and make sure that none of the input dictionaries contains keys not in the first row of our list.
        hdr_list = [self.__header_to_key(hdr) for hdr in self.row_as_list(1)]
        bad_row = next((dd for dd in info_dict_list if len(set(dd.keys()) - set(hdr_list)) > 0), None)
        if bad_row is not None:
            raise GDError('Failed to insert row {} because it contains keys not in spreadsheet headers ({})!'
                          .format(bad_row,hdr_list))

        list_feed = self.gd_client.GetListFeed(self.gd_cur_ss_key, self.gd_cur_ws_id)
        for inf in info_dict_list:
            self.gd_client.InsertRow(inf,self.gd_cur_ss_key, self.gd_cur_ws_id)

    def column_as_list(self,column,with_header=False):
        ''' Read just a single column into a list of strings.

        Ignore the first row by default because it's the column header.
        '''
        self.__ss_check()
        if with_header:
            minrow = 1
        else:
            minrow = 2
        ws = self.gd_client.GetWorksheetsFeed(self.gd_cur_ss_key,self.gd_cur_ws_id)
        if int(ws.row_count.text) < minrow:
            return []

        q = CellQuery(params={'min-row':str(minrow),'min-col':str(column),'max-col':str(column)})
        cells = self.gd_client.GetCellsFeed(self.gd_cur_ss_key,self.gd_cur_ws_id,query=q)

        return [cellentry.cell.text for cellentry in cells.entry]

    def row_as_list(self,row):
        ''' Read just a single row into a list of strings.

        NOTE: this is indexed by cell, so row 1 is the header row!
        '''
        self.__ss_check()
        mincol = 1
        ws = self.gd_client.GetWorksheetsFeed(self.gd_cur_ss_key,self.gd_cur_ws_id)
        if int(ws.col_count.text) < mincol:
            return []

        q = CellQuery(params={'min-row':str(row),'min-col':str(mincol),'max-row':str(row)})
        cells = self.gd_client.GetCellsFeed(self.gd_cur_ss_key,self.gd_cur_ws_id,query=q)

        return [cellentry.cell.text for cellentry in cells.entry]
        

    def read_to_list(self,num_lines=None):
        """ Read the sheet into a list of lists of strings """
        self.__ss_check()
        if num_lines is not None:
            q = DocumentQuery(params={'max-results':'%d'%num_lines})
            list_feed = self.gd_client.GetListFeed(self.gd_cur_ss_key, self.gd_cur_ws_id,query=q)
        else:
            list_feed = self.gd_client.GetListFeed(self.gd_cur_ss_key, self.gd_cur_ws_id)

        string_list = []
        for i, entry in enumerate(list_feed.entry):
            row = {key:entry.custom[key].text for key in entry.custom}
            row['rowname'] = entry.title.text
            string_list.append(row)
        
        return string_list

    def read_to_dict(self,key_column_name,row_start=None,row_num=None):
        """ Read the sheet into a dictionary with keys given by the named column.
        
        NOTE: Raises an error on any redundant rows.
        """

        self.__ss_check()

        # Use this for testing to limit number of results handled:
        params = {}
        if row_start is not None:
            params['start-index'] = '%d'%row_start
        if row_num is not None:
            params['max-results'] = '%d'%row_num
        if len(params) != 0:
            q = ListQuery(params=params)
            list_feed = self.gd_client.GetListFeed(self.gd_cur_ss_key, self.gd_cur_ws_id,query=q)
        else:
            list_feed = self.gd_client.GetListFeed(self.gd_cur_ss_key, self.gd_cur_ws_id)

        multi_rows = []
        string_dict = {}
        for i, entry in enumerate(list_feed.entry):
            # If we get the title column, ignore it.
            if entry.custom[key_column_name].text.replace(' ','').lower() == key_column_name:
                continue
            row = {key:entry.custom[key].text for key in entry.custom if key is not key_column_name}
            row['rowname'] = entry.title.text
            key_name = entry.custom[key_column_name].text
            if key_name in string_dict:
                multi_rows.append(key_name)
            string_dict[key_name] = row

        if len(multi_rows) > 0:
            errors = 'read_to_dict -- Column {} contains multiple rows with each of the following values: {}'.format(key_column_name,multi_rows)
            # raise GDError('read_to_dict -- Column {} contains multiple rows with each of the following values: {}'.format(key_column_name,multi_rows))
        else:
            errors = None

        return string_dict,errors