Exemple #1
0
    def inject(self, doc):
        spreadsheet_id = self.config.id
        gids = self.config.gids or []
        if self.config.gid is not None:
            gids.append(self.config.gid)
        format_as = self.config.format
        if self.config.collection and format_as not in self.MAP_TYPES:
            format_as = 'map'
        _, gid_to_data = GoogleSheetsPreprocessor.download(
            spreadsheet_id=spreadsheet_id, gids=gids, format_as=format_as,
            logger=self.pod.logger, generate_ids=self.config.generate_ids)

        if self.config.path:
            if format_as in ['list']:
                self.pod.logger.info(
                    'Cannot inject list formatted spreadsheet -> {}'.format(self.config.path))
                return
            # Single sheet import.
            path, key_to_update = self.parse_path(self.config.path)

            for gid in gids:
                # Preserve existing yaml data.
                if (path.endswith(('.yaml', '.yml'))
                        and self.config.preserve and self.pod.file_exists(path)):
                    existing_data = self.pod.read_yaml(path)
                    gid_to_data[gid] = utils.format_existing_data(
                        old_data=existing_data, new_data=gid_to_data[gid],
                        preserve=self.config.preserve, key_to_update=key_to_update)

                gid_to_data[gid] = document_fields.DocumentFields.untag(gid_to_data[
                                                                        gid])
                doc.inject(fields=gid_to_data[gid])
        else:
            # TODO Multi sheet import.
            pass
Exemple #2
0
 def format_content(cls,
                    content,
                    path,
                    format_as=None,
                    preserve=None,
                    existing_data=None,
                    key_to_update=None):
     """Formats content into either a CSV (text), list, or dictionary."""
     convert_to = cls.get_convert_to(path)
     if convert_to in ['.json', '.yaml', '.yml']:
         fp = cStringIO.StringIO()
         fp.write(content)
         fp.seek(0)
         if format_as == 'map':
             formatted_data = GoogleSheetsPreprocessor.format_as_map(fp)
         else:
             reader = csv.DictReader(fp)
             formatted_data = list(reader)
         formatted_data = utils.format_existing_data(
             old_data=existing_data,
             new_data=formatted_data,
             preserve=preserve,
             key_to_update=key_to_update)
         return formatted_data
     return content
Exemple #3
0
 def _maybe_preserve_content(self, new_data, path, key_to_update):
     if path.endswith(('.yaml', '.yml')) and self.config.preserve:
         # Use existing data if it exists. If we're updating data at a
         # specific key, and if the existing data doesn't exist, use an
         # empty dict. If the file doesn't exist and if we're not updating
         # at a specific key, just return the new data without reformatting.
         if self.pod.file_exists(path):
             # Do a text parse of the yaml file to prevent the constructors.
             content = self.pod.read_file(path)
             existing_data = utils.load_plain_yaml(content)
         elif key_to_update:
             existing_data = {}
         else:
             return new_data
         # Skip trying to update lists, because there would be no
         # expectation of merging old and new list data.
         if not key_to_update and not isinstance(new_data, dict):
             return new_data
         if isinstance(existing_data, dict):
             return utils.format_existing_data(
                 old_data=existing_data,
                 new_data=new_data,
                 preserve=self.config.preserve,
                 key_to_update=key_to_update)
     return new_data
Exemple #4
0
    def inject(self, doc):
        spreadsheet_id = self.config.id
        gids = self.config.gids or []
        if self.config.gid is not None:
            gids.append(self.config.gid)
        format_as = self.config.format
        if self.config.collection and format_as not in self.MAP_TYPES:
            format_as = 'map'
        _, gid_to_data = GoogleSheetsPreprocessor.download(
            spreadsheet_id=spreadsheet_id, gids=gids, format_as=format_as,
            logger=self.pod.logger, generate_ids=self.config.generate_ids)

        if self.config.path:
            if format_as in ['list']:
                self.pod.logger.info(
                    'Cannot inject list formatted spreadsheet -> {}'.format(self.config.path))
                return
            # Single sheet import.
            path, key_to_update = self.parse_path(self.config.path)

            for gid in gids:
                # Preserve existing yaml data.
                if (path.endswith(('.yaml', '.yml'))
                        and self.config.preserve and self.pod.file_exists(path)):
                    existing_data = self.pod.read_yaml(path)
                    gid_to_data[gid] = utils.format_existing_data(
                        old_data=existing_data, new_data=gid_to_data[gid],
                        preserve=self.config.preserve, key_to_update=key_to_update)

                gid_to_data[gid] = untag.Untag.untag(gid_to_data[gid])
                doc.inject(fields=gid_to_data[gid])
        else:
            # TODO Multi sheet import.
            pass
Exemple #5
0
    def execute(self, config):
        spreadsheet_id = config.id
        gids = config.gids or []
        if config.gid is not None:
            gids.append(config.gid)
        if not gids and not config.collection:
            gids.append(0)
        format_as = config.format
        if (config.collection and
                format_as not in GoogleSheetsPreprocessor.MAP_TYPES and
                format_as not in GoogleSheetsPreprocessor.GRID_TYPES):
            format_as = 'map'
        gid_to_sheet, gid_to_data = GoogleSheetsPreprocessor.download(
            spreadsheet_id=spreadsheet_id, gids=gids, format_as=format_as,
            logger=self.pod.logger, generate_ids=config.generate_ids)

        if config.path:
            # Single sheet import.
            path, key_to_update = self.parse_path(config.path)

            for gid in gids:
                # Preserve existing yaml data.
                if (path.endswith(('.yaml', '.yml'))
                        and self.config.preserve and self.pod.file_exists(path)):
                    existing_data = self.pod.read_yaml(path)
                    # Skip trying to update lists, because there would be no
                    # expectation of merging old and new list data.
                    if isinstance(existing_data, dict):
                        gid_to_data[gid] = utils.format_existing_data(
                            old_data=existing_data, new_data=gid_to_data[gid],
                            preserve=self.config.preserve, key_to_update=key_to_update)

                content = GoogleSheetsPreprocessor.serialize_content(
                    formatted_data=gid_to_data[gid], path=path,
                    output_style=self.config.output_style)

                self.pod.write_file(path, content)
                self.logger.info(
                    'Downloaded {} ({}) -> {}'.format(
                        gid_to_sheet[gid]['title'], gid, path))
        else:
            # Multi sheet import.
            collection_path = config.collection

            if not gids:
                gids = gid_to_sheet.keys()

            for gid in gids:
                if gid_to_sheet[gid]['title'].strip().startswith(IGNORE_INITIAL):
                    continue
                file_name = '{}.yaml'.format(
                    utils.slugify(gid_to_sheet[gid]['title']))
                output_path = os.path.join(collection_path, file_name)
                self.pod.write_yaml(output_path, gid_to_data[gid])
                self.logger.info(
                    'Downloaded {} ({}) -> {}'.format(
                        gid_to_sheet[gid]['title'], gid, output_path))
Exemple #6
0
 def format_content(cls, content, path, format_as=None, preserve=None, existing_data=None, key_to_update=None):
     """Formats content into either a CSV (text), list, or dictionary."""
     convert_to = cls.get_convert_to(path)
     if convert_to in [".json", ".yaml", ".yml"]:
         fp = cStringIO.StringIO()
         fp.write(content)
         fp.seek(0)
         if format_as == "map":
             formatted_data = GoogleSheetsPreprocessor.format_as_map(fp)
         else:
             reader = csv.DictReader(fp)
             formatted_data = list(reader)
         formatted_data = utils.format_existing_data(
             old_data=existing_data, new_data=formatted_data, preserve=preserve, key_to_update=key_to_update
         )
         return formatted_data
     return content
Exemple #7
0
 def _maybe_preserve_content(self, new_data, path, key_to_update,
                             properties):
     # Includes meta properties from the Google Sheet.
     if self.config.include_properties:
         if META_KEY not in new_data:
             new_data[META_KEY] = {}
         if 'properties' not in new_data[META_KEY]:
             new_data[META_KEY]['properties'] = {}
         for name in self.config.include_properties:
             if name in properties:
                 new_data[META_KEY]['properties'][name] = properties[name]
     # Tabs colored red are marked draft.
     if isinstance(new_data, dict) \
             and self.config.color_as_draft \
             and properties.get('tabColor'):
         if properties['tabColor'] == {'red': 1}:
             new_data[DRAFT_KEY] = True
     if path.endswith(('.yaml', '.yml')) and self.config.preserve:
         # Use existing data if it exists. If we're updating data at a
         # specific key, and if the existing data doesn't exist, use an
         # empty dict. If the file doesn't exist and if we're not updating
         # at a specific key, just return the new data without reformatting.
         if self.pod.file_exists(path):
             # Do a text parse of the yaml file to prevent the constructors.
             content = self.pod.read_file(path)
             existing_data = utils.load_plain_yaml(content)
         elif key_to_update:
             existing_data = {}
         else:
             return new_data
         # Skip trying to update lists, because there would be no
         # expectation of merging old and new list data.
         if not key_to_update and not isinstance(new_data, dict):
             return new_data
         if isinstance(existing_data, dict):
             return utils.format_existing_data(
                 old_data=existing_data,
                 new_data=new_data,
                 preserve=self.config.preserve,
                 key_to_update=key_to_update)
     return new_data
Exemple #8
0
 def inject(self, doc):
     path = doc.pod_path
     post_id = doc.base
     try:
         item = BloggerPreprocessor.download_item(
             blog_id=self.config.blog_id,
             post_id=post_id,
             authenticated=self.config.authenticated)
     except (errors.HttpError, base.PreprocessorError):
         text = 'Error downloading Blogger post -> {}'.format(path)
         raise base.PreprocessorError(text)
     if not item:
         return
     fields, body, _ = self._parse_item(item)
     if doc.exists:
         existing_data = doc.get_tagged_fields()
     else:
         existing_data = {}
     fields = utils.format_existing_data(
         old_data=existing_data, new_data=fields,
         preserve=self.config.preserve)
     doc.inject(fields=fields, body=body)
Exemple #9
0
 def inject(self, doc):
     path = doc.pod_path
     post_id = doc.base
     try:
         item = BloggerPreprocessor.download_item(
             blog_id=self.config.blog_id,
             post_id=post_id,
             authenticated=self.config.authenticated)
     except (errors.HttpError, base.PreprocessorError):
         text = 'Error downloading Blogger post -> {}'.format(path)
         raise base.PreprocessorError(text)
     if not item:
         return
     fields, body, _ = self._parse_item(item)
     if doc.exists:
         existing_data = doc.format.front_matter.data
     else:
         existing_data = {}
     fields = utils.format_existing_data(old_data=existing_data,
                                         new_data=fields,
                                         preserve=self.config.preserve)
     doc.inject(fields=fields, body=body)
     return self
Exemple #10
0
 def _maybe_preserve_content(self, new_data, path, key_to_update, properties):
     # Includes meta properties from the Google Sheet.
     if self.config.include_properties:
         if META_KEY not in new_data:
             new_data[META_KEY] = {}
         if 'properties' not in new_data[META_KEY]:
             new_data[META_KEY]['properties'] = {}
         for name in self.config.include_properties:
             if name in properties:
                 new_data[META_KEY]['properties'][name] = properties[name]
     # Tabs colored red are marked draft.
     if self.config.color_as_draft and properties.get('tabColor'):
         if properties['tabColor'] == {'red': 1}:
             new_data[DRAFT_KEY] = True
     if path.endswith(('.yaml', '.yml')) and self.config.preserve:
         # Use existing data if it exists. If we're updating data at a
         # specific key, and if the existing data doesn't exist, use an
         # empty dict. If the file doesn't exist and if we're not updating
         # at a specific key, just return the new data without reformatting.
         if self.pod.file_exists(path):
             # Do a text parse of the yaml file to prevent the constructors.
             content = self.pod.read_file(path)
             existing_data = utils.load_plain_yaml(content)
         elif key_to_update:
             existing_data = {}
         else:
             return new_data
         # Skip trying to update lists, because there would be no
         # expectation of merging old and new list data.
         if not key_to_update and not isinstance(new_data, dict):
             return new_data
         if isinstance(existing_data, dict):
             return utils.format_existing_data(
                 old_data=existing_data, new_data=new_data,
                 preserve=self.config.preserve, key_to_update=key_to_update)
     return new_data