def download(cls, path, doc_id, logger=None, raise_errors=False): logger = logger or logging service = BaseGooglePreprocessor.create_service() # pylint: disable=no-member resp = service.files().get(fileId=doc_id).execute() if 'exportLinks' not in resp: text = 'Unable to export Google Doc: {}' logger.error(text.format(path)) logger.error('Received: {}'.format(resp)) return for mimetype, url in resp['exportLinks'].iteritems(): if not mimetype.endswith('html'): continue resp, content = service._http.request(url) if resp.status != 200: text = 'Error {} downloading Google Doc: {}' text = text.format(resp.status, path) if raise_errors: raise base.PreprocessorError(text) logger.error(text) return content if raise_errors: text = 'No file to export from Google Docs: {}'.format(path) raise base.PreprocessorError(text)
def run(self, build=True): # Avoid restarting the Gulp subprocess if the preprocessor is # being run as a result of restarting the server. if 'RESTARTED' in os.environ: return task = self.config.build_task if build else self.config.run_task command = self._get_command(task) args = sdk_utils.subprocess_args(self.pod, shell=True) process = subprocess.Popen(command, **args) _child_processes.append(process) if not build: return code = process.wait() if code != 0: text = 'Failed to run: {}'.format(command) raise base.PreprocessorError(text)
def download(cls, spreadsheet_id, gids=None, format_as='list', logger=None, generate_ids=False, header_row_count=1, header_row_index=1): logger = logger or logging # Show metadata about the file to help the user better understand what # they are downloading. Also include a link in the output to permit the # user to quickly open the file. drive_service = BaseGooglePreprocessor.create_service('drive', 'v3') # pylint: disable=no-member resp = drive_service.files().get( fileId=spreadsheet_id, fields='name,modifiedTime,lastModifyingUser,webViewLink').execute( ) title = resp['name'] if isinstance(title, unicode): title = title.encode('utf-8') if 'lastModifyingUser' in resp: # Sometimes the email address isn't included. name = resp['lastModifyingUser']['displayName'] modified_by = resp['lastModifyingUser'].get('emailAddress', name) logger.info('Downloading "{}" modified {} by {} from {}'.format( title, resp['modifiedTime'], modified_by, resp['webViewLink'])) else: logger.info('Downloading "{}" modified {} from {}'.format( title, resp['modifiedTime'], resp['webViewLink'])) service = BaseGooglePreprocessor.create_service('sheets', 'v4') format_as_grid = format_as in cls.GRID_TYPES format_as_map = format_as in cls.MAP_TYPES # pylint: disable=no-member spreadsheet = service.spreadsheets().get( spreadsheetId=spreadsheet_id).execute() gid_to_sheet = {} for sheet in spreadsheet['sheets']: gid_to_sheet[sheet['properties']['sheetId']] = sheet['properties'] if not gids: gids = gid_to_sheet.keys() if gids and len(gids) > 1: url = GoogleSheetsPreprocessor._sheet_edit_url_format.format( id=spreadsheet_id) logger.info('Downloading {} tabs -> {}'.format(len(gids), url)) gid_to_data = {} generated_key_index = 0 for gid in gids: if format_as_map: max_column = 'B' else: max_column = GoogleSheetsPreprocessor.column_to_letter( gid_to_sheet[gid]['gridProperties']['columnCount']) range_name = "'{}'!A:{}".format(gid_to_sheet[gid]['title'], max_column) # pylint: disable=no-member resp = service.spreadsheets().values().get( spreadsheetId=spreadsheet_id, range=range_name).execute() if format_as_map or format_as_grid: gid_to_data[gid] = {} else: gid_to_data[gid] = [] if not 'values' in resp: logger.info('No values found in sheet -> {}'.format( gid_to_sheet[gid]['title'])) else: title = gid_to_sheet[gid]['title'] if title.startswith(IGNORE_INITIAL): logger.info('Skipping tab -> {}'.format(title)) continue headers = None header_rows = [] for row in resp['values']: if len(header_rows) < header_row_count: header_rows.append(row) # Only one of the header rows are the actual headers. if len(header_rows) == header_row_index: if format_as_grid: # Ignore first column as a header. headers = row[1:] else: headers = row continue if format_as_grid: if not row: # Skip empty rows. continue key = row[0].strip() if isinstance(key, unicode): key = key.encode('utf-8') if key and key in gid_to_data[gid]: # The key is already in use. raise base.PreprocessorError( 'Duplicate key in use in sheet {}: {}'.format( gid, key)) if key and not key.startswith(IGNORE_INITIAL): # Grids use the first column as the key and make # object out of the remaining columns. grid_obj = {} row = row[1:] row_len = len(row) for col, grid_key in enumerate(headers): if not grid_key or grid_key.startswith( IGNORE_INITIAL): continue if isinstance(grid_key, unicode): grid_key = grid_key.encode('utf-8') value = (row[col] if row_len > col else '').strip() if value: grid_obj[grid_key] = value gid_to_data[gid][key] = grid_obj elif format_as_map: if not row: # Skip empty rows. continue key = row[0].strip() if isinstance(key, unicode): key = key.encode('utf-8') if not key and generate_ids: key = 'untranslated_{}'.format(generated_key_index) generated_key_index += 1 if key and not key.startswith(IGNORE_INITIAL): if format_as == 'strings' and '@' not in key: key = '{}@'.format(key) gid_to_data[gid][key] = (row[1] if len(row) == 2 else '') else: row_values = {} for idx, column in enumerate(headers): if not column.startswith(IGNORE_INITIAL): row_values[column] = (row[idx] if len(row) > idx else '') gid_to_data[gid].append(row_values) return gid_to_sheet, gid_to_data