Example #1
0
 def download(cls, path, doc_id, logger=None, raise_errors=False):
     logger = logger or logging
     service = BaseGooglePreprocessor.create_service()
     # pylint: disable=no-member
     resp = service.files().get(fileId=doc_id).execute()
     if 'exportLinks' not in resp:
         text = 'Unable to export Google Doc: {}'
         logger.error(text.format(path))
         logger.error('Received: {}'.format(resp))
         return
     for mimetype, url in resp['exportLinks'].iteritems():
         if not mimetype.endswith('html'):
             continue
         resp, content = service._http.request(url)
         if resp.status != 200:
             text = 'Error {} downloading Google Doc: {}'
             text = text.format(resp.status, path)
             if raise_errors:
                 raise base.PreprocessorError(text)
             logger.error(text)
         return content
     if raise_errors:
         text = 'No file to export from Google Docs: {}'.format(path)
         raise base.PreprocessorError(text)
Example #2
0
 def run(self, build=True):
     # Avoid restarting the Gulp subprocess if the preprocessor is
     # being run as a result of restarting the server.
     if 'RESTARTED' in os.environ:
         return
     task = self.config.build_task if build else self.config.run_task
     command = self._get_command(task)
     args = sdk_utils.subprocess_args(self.pod, shell=True)
     process = subprocess.Popen(command, **args)
     _child_processes.append(process)
     if not build:
         return
     code = process.wait()
     if code != 0:
         text = 'Failed to run: {}'.format(command)
         raise base.PreprocessorError(text)
Example #3
0
    def download(cls,
                 spreadsheet_id,
                 gids=None,
                 format_as='list',
                 logger=None,
                 generate_ids=False,
                 header_row_count=1,
                 header_row_index=1):
        logger = logger or logging
        # Show metadata about the file to help the user better understand what
        # they are downloading. Also include a link in the output to permit the
        # user to quickly open the file.
        drive_service = BaseGooglePreprocessor.create_service('drive', 'v3')
        # pylint: disable=no-member
        resp = drive_service.files().get(
            fileId=spreadsheet_id,
            fields='name,modifiedTime,lastModifyingUser,webViewLink').execute(
            )
        title = resp['name']
        if isinstance(title, unicode):
            title = title.encode('utf-8')
        if 'lastModifyingUser' in resp:
            # Sometimes the email address isn't included.
            name = resp['lastModifyingUser']['displayName']
            modified_by = resp['lastModifyingUser'].get('emailAddress', name)
            logger.info('Downloading "{}" modified {} by {} from {}'.format(
                title, resp['modifiedTime'], modified_by, resp['webViewLink']))
        else:
            logger.info('Downloading "{}" modified {} from {}'.format(
                title, resp['modifiedTime'], resp['webViewLink']))

        service = BaseGooglePreprocessor.create_service('sheets', 'v4')
        format_as_grid = format_as in cls.GRID_TYPES
        format_as_map = format_as in cls.MAP_TYPES
        # pylint: disable=no-member
        spreadsheet = service.spreadsheets().get(
            spreadsheetId=spreadsheet_id).execute()

        gid_to_sheet = {}
        for sheet in spreadsheet['sheets']:
            gid_to_sheet[sheet['properties']['sheetId']] = sheet['properties']

        if not gids:
            gids = gid_to_sheet.keys()
        if gids and len(gids) > 1:
            url = GoogleSheetsPreprocessor._sheet_edit_url_format.format(
                id=spreadsheet_id)
            logger.info('Downloading {} tabs -> {}'.format(len(gids), url))

        gid_to_data = {}
        generated_key_index = 0
        for gid in gids:
            if format_as_map:
                max_column = 'B'
            else:
                max_column = GoogleSheetsPreprocessor.column_to_letter(
                    gid_to_sheet[gid]['gridProperties']['columnCount'])
            range_name = "'{}'!A:{}".format(gid_to_sheet[gid]['title'],
                                            max_column)

            # pylint: disable=no-member
            resp = service.spreadsheets().values().get(
                spreadsheetId=spreadsheet_id, range=range_name).execute()

            if format_as_map or format_as_grid:
                gid_to_data[gid] = {}
            else:
                gid_to_data[gid] = []

            if not 'values' in resp:
                logger.info('No values found in sheet -> {}'.format(
                    gid_to_sheet[gid]['title']))
            else:
                title = gid_to_sheet[gid]['title']
                if title.startswith(IGNORE_INITIAL):
                    logger.info('Skipping tab -> {}'.format(title))
                    continue
                headers = None
                header_rows = []
                for row in resp['values']:
                    if len(header_rows) < header_row_count:
                        header_rows.append(row)
                        # Only one of the header rows are the actual headers.
                        if len(header_rows) == header_row_index:
                            if format_as_grid:
                                # Ignore first column as a header.
                                headers = row[1:]
                            else:
                                headers = row
                        continue

                    if format_as_grid:
                        if not row:  # Skip empty rows.
                            continue
                        key = row[0].strip()
                        if isinstance(key, unicode):
                            key = key.encode('utf-8')
                        if key and key in gid_to_data[gid]:
                            # The key is already in use.
                            raise base.PreprocessorError(
                                'Duplicate key in use in sheet {}: {}'.format(
                                    gid, key))
                        if key and not key.startswith(IGNORE_INITIAL):
                            # Grids use the first column as the key and make
                            # object out of the remaining columns.
                            grid_obj = {}
                            row = row[1:]
                            row_len = len(row)
                            for col, grid_key in enumerate(headers):
                                if not grid_key or grid_key.startswith(
                                        IGNORE_INITIAL):
                                    continue
                                if isinstance(grid_key, unicode):
                                    grid_key = grid_key.encode('utf-8')
                                value = (row[col]
                                         if row_len > col else '').strip()
                                if value:
                                    grid_obj[grid_key] = value
                            gid_to_data[gid][key] = grid_obj
                    elif format_as_map:
                        if not row:  # Skip empty rows.
                            continue
                        key = row[0].strip()
                        if isinstance(key, unicode):
                            key = key.encode('utf-8')
                        if not key and generate_ids:
                            key = 'untranslated_{}'.format(generated_key_index)
                            generated_key_index += 1
                        if key and not key.startswith(IGNORE_INITIAL):
                            if format_as == 'strings' and '@' not in key:
                                key = '{}@'.format(key)
                            gid_to_data[gid][key] = (row[1]
                                                     if len(row) == 2 else '')
                    else:
                        row_values = {}
                        for idx, column in enumerate(headers):
                            if not column.startswith(IGNORE_INITIAL):
                                row_values[column] = (row[idx] if
                                                      len(row) > idx else '')
                        gid_to_data[gid].append(row_values)
        return gid_to_sheet, gid_to_data