Exemple #1
0
    def execute(self, config):
        spreadsheet_id = config.id
        gids = config.gids or []
        if config.gid is not None:
            gids.append(config.gid)
        if not gids and not config.collection:
            gids.append(0)
        format_as = config.format
        if (config.collection and
                format_as not in GoogleSheetsPreprocessor.MAP_TYPES and
                format_as not in GoogleSheetsPreprocessor.GRID_TYPES):
            format_as = 'map'
        gid_to_sheet, gid_to_data = GoogleSheetsPreprocessor.download(
            spreadsheet_id=spreadsheet_id, gids=gids, format_as=format_as,
            logger=self.pod.logger, generate_ids=config.generate_ids,
            header_row_count=config.header_row_count,
            header_row_index=config.header_row_index)

        if config.path:
            # Single sheet import.
            path, key_to_update = self.parse_path(config.path)

            for gid in gids:
                # Preserve existing data if necessary.
                gid_to_data[gid] = self._maybe_preserve_content(
                        new_data=gid_to_data[gid],
                        path=path,
                        key_to_update=key_to_update)
                content = GoogleSheetsPreprocessor.serialize_content(
                    formatted_data=gid_to_data[gid], path=path,
                    output_style=self.config.output_style)

                self.pod.write_file(path, content)
                self.logger.info(
                    'Downloaded {} ({}) -> {}'.format(
                        gid_to_sheet[gid]['title'], gid, path))
        else:
            # Multi sheet import based on collection.
            collection_path = config.collection

            if not gids:
                gids = gid_to_sheet.keys()

            for gid in gids:
                if gid_to_sheet[gid]['title'].strip().startswith(IGNORE_INITIAL):
                    continue
                file_name = '{}.yaml'.format(
                    utils.slugify(gid_to_sheet[gid]['title']))
                output_path = os.path.join(collection_path, file_name)
                gid_to_data[gid] = self._maybe_preserve_content(
                        new_data=gid_to_data[gid],
                        path=output_path,
                        key_to_update=None)
                # Use plain text dumper to preserve yaml constructors.
                output_content = utils.dump_plain_yaml(gid_to_data[gid])
                self.pod.write_file(output_path, output_content)
                self.logger.info(
                    'Downloaded {} ({}) -> {}'.format(
                        gid_to_sheet[gid]['title'], gid, output_path))
Exemple #2
0
def get_storage(key, username):
    """Returns the Storage class compatible with the current environment."""
    key = utils.slugify(key)
    file_name = os.path.expanduser('~/.config/grow/{}_{}'.format(
        key, username))
    dir_name = os.path.dirname(file_name)
    if not os.path.exists(dir_name):
        os.makedirs(dir_name)
    return oauth_file.Storage(file_name)
Exemple #3
0
    def execute(self, config):
        spreadsheet_id = config.id
        gids = config.gids or []
        if config.gid is not None:
            gids.append(config.gid)
        if not gids and not config.collection:
            gids.append(0)
        format_as = config.format
        if (config.collection and
                format_as not in GoogleSheetsPreprocessor.MAP_TYPES and
                format_as not in GoogleSheetsPreprocessor.GRID_TYPES):
            format_as = 'map'
        gid_to_sheet, gid_to_data = GoogleSheetsPreprocessor.download(
            spreadsheet_id=spreadsheet_id, gids=gids, format_as=format_as,
            logger=self.pod.logger, generate_ids=config.generate_ids)

        if config.path:
            # Single sheet import.
            path, key_to_update = self.parse_path(config.path)

            for gid in gids:
                # Preserve existing yaml data.
                if (path.endswith(('.yaml', '.yml'))
                        and self.config.preserve and self.pod.file_exists(path)):
                    existing_data = self.pod.read_yaml(path)
                    # Skip trying to update lists, because there would be no
                    # expectation of merging old and new list data.
                    if isinstance(existing_data, dict):
                        gid_to_data[gid] = utils.format_existing_data(
                            old_data=existing_data, new_data=gid_to_data[gid],
                            preserve=self.config.preserve, key_to_update=key_to_update)

                content = GoogleSheetsPreprocessor.serialize_content(
                    formatted_data=gid_to_data[gid], path=path,
                    output_style=self.config.output_style)

                self.pod.write_file(path, content)
                self.logger.info(
                    'Downloaded {} ({}) -> {}'.format(
                        gid_to_sheet[gid]['title'], gid, path))
        else:
            # Multi sheet import.
            collection_path = config.collection

            if not gids:
                gids = gid_to_sheet.keys()

            for gid in gids:
                if gid_to_sheet[gid]['title'].strip().startswith(IGNORE_INITIAL):
                    continue
                file_name = '{}.yaml'.format(
                    utils.slugify(gid_to_sheet[gid]['title']))
                output_path = os.path.join(collection_path, file_name)
                self.pod.write_yaml(output_path, gid_to_data[gid])
                self.logger.info(
                    'Downloaded {} ({}) -> {}'.format(
                        gid_to_sheet[gid]['title'], gid, output_path))
Exemple #4
0
 def slug(self):
     value = self.fields.get('$slug')
     if value:
         return value
     if not self.title:
         return None
     if self.pod.is_enabled(self.pod.FEATURE_OLD_SLUGIFY):
         return utils.slugify(self.title)
     return slugify.slugify(self.title)
Exemple #5
0
def get_storage(key, username):
    """Returns the Storage class compatible with the current environment."""
    if appengine and utils.is_appengine():
        return appengine.StorageByKeyName(
            appengine.CredentialsModel, username, 'credentials')
    key = utils.slugify(key)
    file_name = os.path.expanduser('~/.config/grow/{}_{}'.format(key, username))
    dir_name = os.path.dirname(file_name)
    if not os.path.exists(dir_name):
        os.makedirs(dir_name)
    return oauth_file.Storage(file_name)
Exemple #6
0
def get_storage(key, username):
    """Returns the Storage class compatible with the current environment."""
    if appengine and utils.is_appengine():
        return appengine.StorageByKeyName(appengine.CredentialsModel, username,
                                          'credentials')
    key = utils.slugify(key)
    file_name = os.path.expanduser('~/.config/grow/{}_{}'.format(
        key, username))
    dir_name = os.path.dirname(file_name)
    if not os.path.exists(dir_name):
        os.makedirs(dir_name)
    return oauth_file.Storage(file_name)
Exemple #7
0
    def _parse_articles_rss(root, options):
        used_titles = set()

        for item in root.findall('./channel/item'):
            article = Article()

            for child in item:
                if child.tag == CONTENT_KEYS.title:
                    article.title = child.text
                elif child.tag == CONTENT_KEYS.description:
                    article.description = child.text
                    article.content = child.text
                elif child.tag == CONTENT_KEYS.link:
                    article.link = child.text
                elif child.tag == CONTENT_KEYS.published:
                    raw_date = child.text
                    article.published = parse(raw_date)
                elif child.tag == CONTENT_KEYS.content_encoded:
                    article.content = child.text
                elif child.text:
                    article.fields[child.tag] = child.text

                # Handle aliases, in addition to established defaults
                # Handled after defaults to allow for overrides
                for alias in options.get_aliases(child.tag):
                    article.fields[alias] = child.text

            if article.title:
                slug = utils.slugify(article.title)

                if slug in used_titles:
                    index = 1
                    alt_slug = slug
                    while alt_slug in used_titles:
                        alt_slug = '{}-{}'.format(slug, index)
                        index = index + 1
                    slug = alt_slug

                article.slug = slug

            if article.content:
                soup_article_content = BS(article.content, "html.parser")
                soup_article_image = soup_article_content.find('img')

                if soup_article_image:
                    article.image = soup_article_image['src']

            yield article
Exemple #8
0
    def _parse_articles_rss(root):
        used_titles = set()

        for item in root.findall('./channel/item'):
            article = Article()

            for child in item:
                if child.tag == CONTENT_KEYS.title:
                    article.title = child.text.encode('utf8')
                elif child.tag == CONTENT_KEYS.description:
                    article.description = child.text.encode('utf8')
                    article.content = child.text.encode('utf8')
                elif child.tag == CONTENT_KEYS.link:
                    article.link = child.text.encode('utf8')
                elif child.tag == CONTENT_KEYS.published:
                    raw_date = child.text.encode('utf8')
                    article.published = parse(raw_date)
                elif child.tag == CONTENT_KEYS.content_encoded:
                    article.content = child.text.encode('utf8')
                elif child.text:
                    article.fields[child.tag] = child.text.encode('utf8')

            if article.title:
                slug = utils.slugify(article.title)

                if slug in used_titles:
                    index = 1
                    alt_slug = slug
                    while alt_slug in used_titles:
                        alt_slug = '{}-{}'.format(slug, index)
                        index = index + 1
                    slug = alt_slug

                article.slug = slug

            if article.content:
                soup_article_content = BS(article.content, "html.parser")
                soup_article_image = soup_article_content.find('img')

                if soup_article_image:
                    article.image = soup_article_image['src']

            yield article
Exemple #9
0
 def execute(self, config):
     convert = config.convert is not False
     # Binds a Google Drive folder to a collection.
     if config.folder:
         service = BaseGooglePreprocessor.create_service()
         query = "'{}' in parents".format(config.folder)
         # pylint: disable=no-member
         resp = service.files().list(q=query).execute()
         docs_to_add = []
         existing_docs = self.pod.list_dir(config.collection)
         for item in resp['items']:
             doc_id = item['id']
             title = item['title']
             if item['mimeType'] != 'application/vnd.google-apps.document':
                 continue
             if title.startswith(IGNORE_INITIAL):
                 self.pod.logger.info('Skipping -> {}'.format(title))
                 continue
             if self.pod.is_enabled(self.pod.FEATURE_OLD_SLUGIFY):
                 basename = '{}.md'.format(utils.slugify(title))
             else:
                 basename = '{}.md'.format(slugify.slugify(title))
             docs_to_add.append(basename)
             path = os.path.join(config.collection, basename)
             self._execute_doc(path, doc_id, convert)
         # Clean up files that are no longer in Google Drive.
         for path in existing_docs:
             doc_path = path.lstrip(os.path.sep)
             if doc_path.startswith(IGNORE_INITIAL):
                 continue
             if doc_path not in docs_to_add:
                 path_to_delete = os.path.join(config.collection, doc_path)
                 text = 'Deleting -> {}'.format(path_to_delete)
                 self.pod.logger.info(text)
                 self.pod.delete_file(path_to_delete)
         return
     # Downloads a single document.
     doc_id = config.id
     path = config.path
     self._execute_doc(config.path, doc_id, convert)
Exemple #10
0
 def execute(self, config):
     convert = config.convert is not False
     # Binds a Google Drive folder to a collection.
     if config.folder:
         service = BaseGooglePreprocessor.create_service()
         query = "'{}' in parents".format(config.folder)
         # pylint: disable=no-member
         resp = service.files().list(q=query).execute()
         docs_to_add = []
         existing_docs = self.pod.list_dir(config.collection)
         for item in resp['items']:
             doc_id = item['id']
             title = item['title']
             if item['mimeType'] != 'application/vnd.google-apps.document':
                 continue
             if title.startswith(IGNORE_INITIAL):
                 self.pod.logger.info('Skipping -> {}'.format(title))
                 continue
             basename = '{}.md'.format(utils.slugify(title))
             docs_to_add.append(basename)
             path = os.path.join(config.collection, basename)
             self._execute_doc(path, doc_id, convert)
         # Clean up files that are no longer in Google Drive.
         for path in existing_docs:
             doc_path = path.lstrip(os.path.sep)
             if doc_path.startswith(IGNORE_INITIAL):
                 continue
             if doc_path not in docs_to_add:
                 path_to_delete = os.path.join(config.collection, doc_path)
                 text = 'Deleting -> {}'.format(path_to_delete)
                 self.pod.logger.info(text)
                 self.pod.delete_file(path_to_delete)
         return
     # Downloads a single document.
     doc_id = config.id
     path = config.path
     self._execute_doc(config.path, doc_id, convert)
Exemple #11
0
 def _slug_filter(value, delimiter=u'-'):
     if not value:
         return value
     if use_legacy_slugify:
         return utils.slugify(value, delimiter)
     return slugify.slugify(value, separator=delimiter)
Exemple #12
0
    def execute(self, config):
        spreadsheet_id = config.id
        gids = config.gids or []
        if config.gid is not None:
            gids.append(config.gid)
        if not gids and not config.collection:
            gids.append(0)
        format_as = config.format
        keep_empty_values = config.keep_empty_values
        if (config.collection
                and format_as not in GoogleSheetsPreprocessor.MAP_TYPES
                and format_as not in GoogleSheetsPreprocessor.GRID_TYPES):
            format_as = 'map'
        gid_to_sheet, gid_to_data = GoogleSheetsPreprocessor.download(
            spreadsheet_id=spreadsheet_id,
            gids=gids,
            format_as=format_as,
            logger=self.pod.logger,
            generate_ids=config.generate_ids,
            header_row_count=config.header_row_count,
            header_row_index=config.header_row_index,
            keep_empty_values=keep_empty_values)

        if config.path:
            # Single sheet import.
            path, key_to_update = self.parse_path(config.path)

            for gid in gids:
                if gid not in gid_to_data:
                    self.logger.info(
                        'Sheet not imported for gid {}. Skipped tab?'.format(
                            gid))
                    continue
                gid_to_data[gid] = self._maybe_preserve_content(
                    new_data=gid_to_data[gid],
                    path=path,
                    key_to_update=key_to_update,
                    properties=gid_to_sheet[gid])
                content = GoogleSheetsPreprocessor.serialize_content(
                    formatted_data=gid_to_data[gid],
                    path=path,
                    output_style=self.config.output_style)

                self.pod.write_file(path, content)
                self.logger.info('Downloaded {} ({}) -> {}'.format(
                    gid_to_sheet[gid]['title'], gid, path))
        else:
            # Multi sheet import based on collection.
            collection_path = config.collection

            if not gids:
                gids = list(gid_to_sheet.keys())

            num_saved = 0
            for gid in gids:
                if gid not in gid_to_data:
                    self.logger.info(
                        'Sheet not imported for gid {}. Skipped tab?'.format(
                            gid))
                    continue
                title = gid_to_sheet[gid]['title']
                if title.strip().startswith(IGNORE_INITIAL):
                    continue
                if self.pod.is_enabled(self.pod.FEATURE_OLD_SLUGIFY):
                    slug = utils.slugify(title)
                else:
                    slug = slugify.slugify(title)
                file_name = '{}.yaml'.format(slug)
                output_path = os.path.join(collection_path, file_name)
                gid_to_data[gid] = self._maybe_preserve_content(
                    new_data=gid_to_data[gid],
                    path=output_path,
                    key_to_update=None,
                    properties=gid_to_sheet[gid])
                # Use plain text dumper to preserve yaml constructors.
                output_content = utils.dump_plain_yaml(gid_to_data[gid])
                self.pod.write_file(output_path, output_content)
                if gid_to_data[gid].get(DRAFT_KEY):
                    self.logger.info('Drafted tab -> {}'.format(title))
                num_saved += 1
            text = 'Saved {} tabs -> {}'
            self.logger.info(text.format(num_saved, collection_path))
Exemple #13
0
 def slug(self):
   if '$slug' in self.fields:
     return self.fields['$slug']
   return utils.slugify(self.title) if self.title is not None else None
Exemple #14
0
 def slug(self):
     if '$slug' in self.fields:
         return self.fields['$slug']
     return utils.slugify(self.title) if self.title is not None else None
Exemple #15
0
 def slug(self):
     value = self.fields.get('$slug')
     if value:
         return value
     return utils.slugify(self.title) if self.title is not None else None
Exemple #16
0
def slug_filter(value, delimiter=u'-'):
    """Filters string to remove url unfriendly characters."""
    return utils.slugify(value, delimiter)
Exemple #17
0
 def slug(self):
     if "$slug" in self.fields:
         return self.fields["$slug"]
     return utils.slugify(self.title) if self.title is not None else None
Exemple #18
0
 def slug(self):
     value = self.fields.get('$slug')
     if value:
         return value
     return utils.slugify(self.title) if self.title is not None else None
Exemple #19
0
    def execute(self, config):
        spreadsheet_id = config.id
        gids = config.gids or []
        if config.gid is not None:
            gids.append(config.gid)
        if not gids and not config.collection:
            gids.append(0)
        format_as = config.format
        if (config.collection and
                format_as not in GoogleSheetsPreprocessor.MAP_TYPES and
                format_as not in GoogleSheetsPreprocessor.GRID_TYPES):
            format_as = 'map'
        gid_to_sheet, gid_to_data = GoogleSheetsPreprocessor.download(
            spreadsheet_id=spreadsheet_id, gids=gids, format_as=format_as,
            logger=self.pod.logger, generate_ids=config.generate_ids,
            header_row_count=config.header_row_count,
            header_row_index=config.header_row_index)

        if config.path:
            # Single sheet import.
            path, key_to_update = self.parse_path(config.path)

            for gid in gids:
                if gid not in gid_to_data:
                    self.logger.info(
                        'Sheet not imported for gid {}. Skipped tab?'.format(gid))
                    continue
                gid_to_data[gid] = self._maybe_preserve_content(
                    new_data=gid_to_data[gid],
                    path=path,
                    key_to_update=key_to_update,
                    properties=gid_to_sheet[gid])
                content = GoogleSheetsPreprocessor.serialize_content(
                    formatted_data=gid_to_data[gid], path=path,
                    output_style=self.config.output_style)

                self.pod.write_file(path, content)
                self.logger.info(
                    'Downloaded {} ({}) -> {}'.format(
                        gid_to_sheet[gid]['title'], gid, path))
        else:
            # Multi sheet import based on collection.
            collection_path = config.collection

            if not gids:
                gids = gid_to_sheet.keys()

            num_saved = 0
            for gid in gids:
                if gid not in gid_to_data:
                    self.logger.info(
                        'Sheet not imported for gid {}. Skipped tab?'.format(gid))
                    continue
                title = gid_to_sheet[gid]['title']
                if title.strip().startswith(IGNORE_INITIAL):
                    continue
                slug = utils.slugify(title)
                file_name = '{}.yaml'.format(slug)
                output_path = os.path.join(collection_path, file_name)
                gid_to_data[gid] = self._maybe_preserve_content(
                    new_data=gid_to_data[gid],
                    path=output_path,
                    key_to_update=None,
                    properties=gid_to_sheet[gid])
                # Use plain text dumper to preserve yaml constructors.
                output_content = utils.dump_plain_yaml(gid_to_data[gid])
                self.pod.write_file(output_path, output_content)
                if gid_to_data[gid].get(DRAFT_KEY):
                    self.logger.info('Drafted tab -> {}'.format(title))
                num_saved += 1
            text = 'Saved {} tabs -> {}'
            self.logger.info(text.format(num_saved, collection_path))