def inject(self, doc): spreadsheet_id = self.config.id gids = self.config.gids or [] if self.config.gid is not None: gids.append(self.config.gid) format_as = self.config.format if self.config.collection and format_as not in self.MAP_TYPES: format_as = 'map' _, gid_to_data = GoogleSheetsPreprocessor.download( spreadsheet_id=spreadsheet_id, gids=gids, format_as=format_as, logger=self.pod.logger, generate_ids=self.config.generate_ids) if self.config.path: if format_as in ['list']: self.pod.logger.info( 'Cannot inject list formatted spreadsheet -> {}'.format(self.config.path)) return # Single sheet import. path, key_to_update = self.parse_path(self.config.path) for gid in gids: # Preserve existing yaml data. if (path.endswith(('.yaml', '.yml')) and self.config.preserve and self.pod.file_exists(path)): existing_data = self.pod.read_yaml(path) gid_to_data[gid] = utils.format_existing_data( old_data=existing_data, new_data=gid_to_data[gid], preserve=self.config.preserve, key_to_update=key_to_update) gid_to_data[gid] = document_fields.DocumentFields.untag(gid_to_data[ gid]) doc.inject(fields=gid_to_data[gid]) else: # TODO Multi sheet import. pass
def format_content(cls, content, path, format_as=None, preserve=None, existing_data=None, key_to_update=None): """Formats content into either a CSV (text), list, or dictionary.""" convert_to = cls.get_convert_to(path) if convert_to in ['.json', '.yaml', '.yml']: fp = cStringIO.StringIO() fp.write(content) fp.seek(0) if format_as == 'map': formatted_data = GoogleSheetsPreprocessor.format_as_map(fp) else: reader = csv.DictReader(fp) formatted_data = list(reader) formatted_data = utils.format_existing_data( old_data=existing_data, new_data=formatted_data, preserve=preserve, key_to_update=key_to_update) return formatted_data return content
def _maybe_preserve_content(self, new_data, path, key_to_update): if path.endswith(('.yaml', '.yml')) and self.config.preserve: # Use existing data if it exists. If we're updating data at a # specific key, and if the existing data doesn't exist, use an # empty dict. If the file doesn't exist and if we're not updating # at a specific key, just return the new data without reformatting. if self.pod.file_exists(path): # Do a text parse of the yaml file to prevent the constructors. content = self.pod.read_file(path) existing_data = utils.load_plain_yaml(content) elif key_to_update: existing_data = {} else: return new_data # Skip trying to update lists, because there would be no # expectation of merging old and new list data. if not key_to_update and not isinstance(new_data, dict): return new_data if isinstance(existing_data, dict): return utils.format_existing_data( old_data=existing_data, new_data=new_data, preserve=self.config.preserve, key_to_update=key_to_update) return new_data
def inject(self, doc): spreadsheet_id = self.config.id gids = self.config.gids or [] if self.config.gid is not None: gids.append(self.config.gid) format_as = self.config.format if self.config.collection and format_as not in self.MAP_TYPES: format_as = 'map' _, gid_to_data = GoogleSheetsPreprocessor.download( spreadsheet_id=spreadsheet_id, gids=gids, format_as=format_as, logger=self.pod.logger, generate_ids=self.config.generate_ids) if self.config.path: if format_as in ['list']: self.pod.logger.info( 'Cannot inject list formatted spreadsheet -> {}'.format(self.config.path)) return # Single sheet import. path, key_to_update = self.parse_path(self.config.path) for gid in gids: # Preserve existing yaml data. if (path.endswith(('.yaml', '.yml')) and self.config.preserve and self.pod.file_exists(path)): existing_data = self.pod.read_yaml(path) gid_to_data[gid] = utils.format_existing_data( old_data=existing_data, new_data=gid_to_data[gid], preserve=self.config.preserve, key_to_update=key_to_update) gid_to_data[gid] = untag.Untag.untag(gid_to_data[gid]) doc.inject(fields=gid_to_data[gid]) else: # TODO Multi sheet import. pass
def execute(self, config): spreadsheet_id = config.id gids = config.gids or [] if config.gid is not None: gids.append(config.gid) if not gids and not config.collection: gids.append(0) format_as = config.format if (config.collection and format_as not in GoogleSheetsPreprocessor.MAP_TYPES and format_as not in GoogleSheetsPreprocessor.GRID_TYPES): format_as = 'map' gid_to_sheet, gid_to_data = GoogleSheetsPreprocessor.download( spreadsheet_id=spreadsheet_id, gids=gids, format_as=format_as, logger=self.pod.logger, generate_ids=config.generate_ids) if config.path: # Single sheet import. path, key_to_update = self.parse_path(config.path) for gid in gids: # Preserve existing yaml data. if (path.endswith(('.yaml', '.yml')) and self.config.preserve and self.pod.file_exists(path)): existing_data = self.pod.read_yaml(path) # Skip trying to update lists, because there would be no # expectation of merging old and new list data. if isinstance(existing_data, dict): gid_to_data[gid] = utils.format_existing_data( old_data=existing_data, new_data=gid_to_data[gid], preserve=self.config.preserve, key_to_update=key_to_update) content = GoogleSheetsPreprocessor.serialize_content( formatted_data=gid_to_data[gid], path=path, output_style=self.config.output_style) self.pod.write_file(path, content) self.logger.info( 'Downloaded {} ({}) -> {}'.format( gid_to_sheet[gid]['title'], gid, path)) else: # Multi sheet import. collection_path = config.collection if not gids: gids = gid_to_sheet.keys() for gid in gids: if gid_to_sheet[gid]['title'].strip().startswith(IGNORE_INITIAL): continue file_name = '{}.yaml'.format( utils.slugify(gid_to_sheet[gid]['title'])) output_path = os.path.join(collection_path, file_name) self.pod.write_yaml(output_path, gid_to_data[gid]) self.logger.info( 'Downloaded {} ({}) -> {}'.format( gid_to_sheet[gid]['title'], gid, output_path))
def format_content(cls, content, path, format_as=None, preserve=None, existing_data=None, key_to_update=None): """Formats content into either a CSV (text), list, or dictionary.""" convert_to = cls.get_convert_to(path) if convert_to in [".json", ".yaml", ".yml"]: fp = cStringIO.StringIO() fp.write(content) fp.seek(0) if format_as == "map": formatted_data = GoogleSheetsPreprocessor.format_as_map(fp) else: reader = csv.DictReader(fp) formatted_data = list(reader) formatted_data = utils.format_existing_data( old_data=existing_data, new_data=formatted_data, preserve=preserve, key_to_update=key_to_update ) return formatted_data return content
def _maybe_preserve_content(self, new_data, path, key_to_update, properties): # Includes meta properties from the Google Sheet. if self.config.include_properties: if META_KEY not in new_data: new_data[META_KEY] = {} if 'properties' not in new_data[META_KEY]: new_data[META_KEY]['properties'] = {} for name in self.config.include_properties: if name in properties: new_data[META_KEY]['properties'][name] = properties[name] # Tabs colored red are marked draft. if isinstance(new_data, dict) \ and self.config.color_as_draft \ and properties.get('tabColor'): if properties['tabColor'] == {'red': 1}: new_data[DRAFT_KEY] = True if path.endswith(('.yaml', '.yml')) and self.config.preserve: # Use existing data if it exists. If we're updating data at a # specific key, and if the existing data doesn't exist, use an # empty dict. If the file doesn't exist and if we're not updating # at a specific key, just return the new data without reformatting. if self.pod.file_exists(path): # Do a text parse of the yaml file to prevent the constructors. content = self.pod.read_file(path) existing_data = utils.load_plain_yaml(content) elif key_to_update: existing_data = {} else: return new_data # Skip trying to update lists, because there would be no # expectation of merging old and new list data. if not key_to_update and not isinstance(new_data, dict): return new_data if isinstance(existing_data, dict): return utils.format_existing_data( old_data=existing_data, new_data=new_data, preserve=self.config.preserve, key_to_update=key_to_update) return new_data
def inject(self, doc): path = doc.pod_path post_id = doc.base try: item = BloggerPreprocessor.download_item( blog_id=self.config.blog_id, post_id=post_id, authenticated=self.config.authenticated) except (errors.HttpError, base.PreprocessorError): text = 'Error downloading Blogger post -> {}'.format(path) raise base.PreprocessorError(text) if not item: return fields, body, _ = self._parse_item(item) if doc.exists: existing_data = doc.get_tagged_fields() else: existing_data = {} fields = utils.format_existing_data( old_data=existing_data, new_data=fields, preserve=self.config.preserve) doc.inject(fields=fields, body=body)
def inject(self, doc): path = doc.pod_path post_id = doc.base try: item = BloggerPreprocessor.download_item( blog_id=self.config.blog_id, post_id=post_id, authenticated=self.config.authenticated) except (errors.HttpError, base.PreprocessorError): text = 'Error downloading Blogger post -> {}'.format(path) raise base.PreprocessorError(text) if not item: return fields, body, _ = self._parse_item(item) if doc.exists: existing_data = doc.format.front_matter.data else: existing_data = {} fields = utils.format_existing_data(old_data=existing_data, new_data=fields, preserve=self.config.preserve) doc.inject(fields=fields, body=body) return self
def _maybe_preserve_content(self, new_data, path, key_to_update, properties): # Includes meta properties from the Google Sheet. if self.config.include_properties: if META_KEY not in new_data: new_data[META_KEY] = {} if 'properties' not in new_data[META_KEY]: new_data[META_KEY]['properties'] = {} for name in self.config.include_properties: if name in properties: new_data[META_KEY]['properties'][name] = properties[name] # Tabs colored red are marked draft. if self.config.color_as_draft and properties.get('tabColor'): if properties['tabColor'] == {'red': 1}: new_data[DRAFT_KEY] = True if path.endswith(('.yaml', '.yml')) and self.config.preserve: # Use existing data if it exists. If we're updating data at a # specific key, and if the existing data doesn't exist, use an # empty dict. If the file doesn't exist and if we're not updating # at a specific key, just return the new data without reformatting. if self.pod.file_exists(path): # Do a text parse of the yaml file to prevent the constructors. content = self.pod.read_file(path) existing_data = utils.load_plain_yaml(content) elif key_to_update: existing_data = {} else: return new_data # Skip trying to update lists, because there would be no # expectation of merging old and new list data. if not key_to_update and not isinstance(new_data, dict): return new_data if isinstance(existing_data, dict): return utils.format_existing_data( old_data=existing_data, new_data=new_data, preserve=self.config.preserve, key_to_update=key_to_update) return new_data