예제 #1
0
    def _import_subscriptions(self):
        subscriptions = None
        for f in self.z.namelist():
            if 'subscriptions.xml' in f:
                subscriptions = opml.from_string(self.z.open(f).read())
                break

        if subscriptions is None:
            return False

        for sub in subscriptions:
            if hasattr(sub, 'type'):
                title = sub.title
                link = sub.xmlUrl
                site = sub.htmlUrl
                Feed.create_and_subscribe(title, link, site, self.user)
            else:
                # In this case, it's a 'group' of feeds.
                folder = sub
                for sub in folder:
                    title = sub.title
                    link = sub.xmlUrl
                    site = sub.htmlUrl
                    feed = Feed.create_and_subscribe(title, link, site, self.user)

                    userfeed = feed.userfeed(self.user)
                    userfeed.tags.add(folder.title)
        return True
예제 #2
0
파일: Adv.py 프로젝트: simudream/KindleEar
    def POST(self):
        import opml
        x = web.input(importfile={})
        memcache.set(MEMC_ADV_ID, self.__url__, 86400)
        if 'importfile' in x:
            user = self.getcurrentuser()
            try:
                rsslist = opml.from_string(x.importfile.file.read())
            except Exception as e:
                return self.GET(str(e))

            for o in self.walkOutline(rsslist):
                title, url, isfulltext = o.text, urllib.unquote_plus(
                    o.xmlUrl), o.isFulltext  #isFulltext为非标准属性
                isfulltext = bool(isfulltext.lower() in ('true', '1'))
                if title and url:
                    rss = Feed.all().filter('book = ', user.ownfeeds).filter(
                        "url = ", url).get()  #查询是否有重复的
                    if rss:
                        rss.title = title
                        rss.isfulltext = isfulltext
                        rss.put()
                    else:
                        Feed(title=title,
                             url=url,
                             book=user.ownfeeds,
                             isfulltext=isfulltext,
                             time=datetime.datetime.utcnow()).put()

            memcache.delete('%d.feedscount' % user.ownfeeds.key().id())
            raise web.seeother('/my')
        else:
            raise web.seeother('')
예제 #3
0
def get_feeds(opml_text: bytes, selected_shows: Optional[list] = None ) -> list:
    if selected_shows:
        print('These are the selected shows: {}'.format(', '.join(selected_shows)))
    opml_feeds = opml.from_string(opml_text)

    filtered_feeds = []

    if selected_shows:
        # https://stackoverflow.com/questions/59825/how-to-retrieve-an-element-from-a-set-without-removing-it#answer-59841
        first_obj = next(iter(selected_shows))
        if first_obj != '':
            opml_feeds = filter_shows( opml_feeds, selected_shows)

    for feed in opml_feeds:
        if feed.type == 'rss':
            filtered_feeds.append(
                {
                    'feed_name': feed.title,
                    'feed_url': feed.htmlUrl
                }
            )
        else:
            raise Exception("Your file had a non rss value.")
        
    show_names = [ show['feed_name'] for show in filtered_feeds ]
    print('This is the filtered show list: {}'.format(', '.join(show_names)))
    return filtered_feeds
예제 #4
0
파일: srfm.py 프로젝트: zombified/slicerss
def opml_import(opml_data, idoffset, isdup):
    outline = opml.from_string(opml_data)
    feeds = []

    def handlefeed(sub, tags):
        if isdup(sub.xmlUrl.strip().lower(), tags):
            return []
        feeddata = dict(id=len(feeds) + idoffset,
                        tags=[a for a in set(tags)],
                        title=sub.text,
                        xmlUrl=sub.xmlUrl,
                        htmlUrl=sub.htmlUrl if sub.htmlUrl else '')
        feeds.append(feeddata)

    def handlesub(sub, tags):
        if len(sub) < 1:  # just a feed
            handlefeed(sub, tags)
        else:  # a folder of feeds
            newtags = []
            newtags.extend(tags)
            try:
                newtags.append(sub.text)
            except:
                pass
            for s in sub:
                handlesub(s, newtags)

    handlesub(outline, [])
    return feeds
예제 #5
0
파일: srfm.py 프로젝트: zombified/slicerss
def opml_import(opml_data, idoffset, isdup):
    outline = opml.from_string(opml_data)
    feeds = []

    def handlefeed(sub, tags):
        if isdup(sub.xmlUrl.strip().lower(), tags):
            return []
        feeddata = dict(
            id=len(feeds)+idoffset,
            tags=[a for a in set(tags)],
            title=sub.text,
            xmlUrl=sub.xmlUrl,
            htmlUrl=sub.htmlUrl if sub.htmlUrl else '')
        feeds.append(feeddata)
    def handlesub(sub, tags):
        if len(sub) < 1:  # just a feed
            handlefeed(sub, tags)
        else:  # a folder of feeds
            newtags = []
            newtags.extend(tags)
            try:
                newtags.append(sub.text)
            except:
                pass
            for s in sub:
                handlesub(s, newtags)

    handlesub(outline, [])
    return feeds
예제 #6
0
파일: mblrdr.py 프로젝트: smonev/mblrdr
    def post(self):
        user = users.get_current_user()
        if not user:
            self.redirect('/')

        from google.appengine.ext import blobstore
        import opml

        # get file
        upload_files = self.get_uploads('file')
        blob_info = upload_files[0]
        blob_reader = blobstore.BlobReader(blob_info.key())
        opmlFile = blob_reader.read()

        # get user
        ud = GetAppUserByEmail(user.email())
        private_data = json.loads(ud.private_data)
        self.bloglist = private_data['bloglist']

        # parse file
        outline = opml.from_string(opmlFile)
        self.processOutline(outline, 'root')

        # save new data
        private_data['bloglist'] = self.bloglist
        ud.private_data = json.dumps(private_data)
        ud.put()

        logging.debug('imported blog list: %s', json.dumps(self.bloglist))

        self.redirect('/')
예제 #7
0
 def get_form_initial(self, step):
     if step == '1':
         src = None
         uploaddata = self.get_cleaned_data_for_step('0')
         if uploaddata['file']:
             fsrc = uploaddata['file']
             str = ""
             for chunk in fsrc.chunks():
                 str += chunk
             ofile = opml.from_string(str)
         else:
             src = uploaddata['url']
             ofile = opml.parse(src)
         initial = []
         for entry in ofile:
             init_entry = {
                 'enabled': True,
                 'title': entry.title,
                 'feedurl': entry.xmlUrl,
                 'wwwurl': entry.htmlUrl,
             }
             initial.append(init_entry)
         return initial
     else:
         return super(OPMLImport, self).get_form_initial(step)
예제 #8
0
파일: Adv.py 프로젝트: 302sk/KindleEar
 def POST(self):
     import opml
     x = web.input(importfile={})
     memcache.set(MEMC_ADV_ID, self.__url__, 86400)
     if 'importfile' in x:
         user = self.getcurrentuser()
         try:
             rsslist = opml.from_string(x.importfile.file.read())
         except Exception as e:
             return self.GET(str(e))
         
         for o in self.walkOutline(rsslist):
             title, url, isfulltext = o.text, urllib.unquote_plus(o.xmlUrl), o.isFulltext #isFulltext为非标准属性
             isfulltext = bool(isfulltext.lower() in ('true', '1'))
             if title and url:
                 rss = Feed.all().filter('book = ', user.ownfeeds).filter("url = ", url).get() #查询是否有重复的
                 if rss:
                     rss.title = title
                     rss.isfulltext = isfulltext
                     rss.put()
                 else:
                     Feed(title=title,url=url,book=user.ownfeeds,isfulltext=isfulltext,
                         time=datetime.datetime.utcnow()).put()
                         
         memcache.delete('%d.feedscount'%user.ownfeeds.key().id())
         raise web.seeother('/my')
     else:
         raise web.seeother('')
예제 #9
0
파일: forms.py 프로젝트: bors-ltd/feedhq
    def to_python(self, data):
        f = super(OPMLField, self).to_python(data)
        if f is None:
            return

        if hasattr(data, "read"):
            content = data.read()
        else:
            content = data["content"]
        try:
            opml.from_string(content)
        except XMLSyntaxError:
            raise forms.ValidationError(_("This file doesn't seem to be a valid OPML file."))

        if hasattr(f, "seek") and callable(f.seek):
            f.seek(0)
        return f
예제 #10
0
파일: forms.py 프로젝트: cvk14/feedhq
    def to_python(self, data):
        f = super(OPMLField, self).to_python(data)
        if f is None:
            return

        if hasattr(data, 'read'):
            content = data.read()
        else:
            content = data['content']
        try:
            opml.from_string(content)
        except XMLSyntaxError:
            raise forms.ValidationError(
                _("This file doesn't seem to be a valid OPML file."))

        if hasattr(f, 'seek') and callable(f.seek):
            f.seek(0)
        return f
예제 #11
0
파일: data.py 프로젝트: samosky123/newspipe
def import_opml(nickname, opml_content):
    """
    Import new feeds from an OPML file.
    """
    user = User.query.filter(User.nickname == nickname).first()
    try:
        subscriptions = opml.from_string(opml_content)
    except:
        logger.exception("Parsing OPML file failed:")
        raise

    def read(subsubscription, nb=0):
        """
        Parse recursively through the categories and sub-categories.
        """
        for subscription in subsubscription:
            if len(subscription) != 0:
                nb = read(subscription, nb)
            else:
                try:
                    title = subscription.text
                except:
                    title = ""
                try:
                    description = subscription.description
                except:
                    description = ""
                try:
                    link = subscription.xmlUrl
                except:
                    continue
                if (
                    None
                    != Feed.query.filter(
                        Feed.user_id == user.id, Feed.link == link
                    ).first()
                ):
                    continue
                try:
                    site_link = subscription.htmlUrl
                except:
                    site_link = ""
                new_feed = Feed(
                    title=title,
                    description=description,
                    link=link,
                    site_link=site_link,
                    enabled=True,
                )
                user.feeds.append(new_feed)
                nb += 1
        return nb

    nb = read(subscriptions)
    db.session.commit()
    return nb
예제 #12
0
def import_opml(user_id, opml_url=None, data=None):
    outline = None
    if opml_url is not None:
        outline = opml.parse(opml_url)
    if data is not None:
        outline = opml.from_string(data)
    outline = outline or []
    for entry in outline:
        url = entry.xmlUrl
        print(url)
        subscribe_to_url(url, user_id)
예제 #13
0
파일: tools.py 프로젝트: duilw/rssreader
def import_opml(user_id, opml_url=None, data=None):
    outline = None
    if opml_url is not None:
        outline = opml.parse(opml_url)
    if data is not None:
        outline = opml.from_string(data)
    outline = outline or []
    for entry in outline:
        url = entry.xmlUrl
        print url
        subscribe_to_url(url, user_id)
예제 #14
0
파일: views.py 프로젝트: pombredanne/feedhq
    def post(self, request, *args, **kwargs):
        try:
            entries = opml.from_string(request.body)
        except XMLSyntaxError:
            raise exceptions.ParseError("This file doesn't seem to be a valid OPML file.")

        existing_feeds = set(request.user.feeds.values_list("url", flat=True))
        try:
            with user_lock("opml_import", request.user.pk, timeout=30):
                imported = save_outline(request.user, None, entries, existing_feeds)
        except ValidationError:
            raise exceptions.ParseError("Another concurrent OPML import is happening " "for this user.")
        return Response("OK: {0}".format(imported))
예제 #15
0
def add_file(f, openzip, cnt, feed_urls):
    if f.filename.startswith("__") or f.filename.startswith("."):
        return (0, feed_urls)
    if not f.filename.endswith("subscriptions.xml"):
        return (0, feed_urls)

    opmlfile = openzip.read(f)
    outline = opml.from_string(opmlfile)
    for o in outline:
        for o2 in o:
            feed_urls.append(getattr(o2, 'xmlUrl'))
            cnt += 1
    return (cnt, feed_urls)
예제 #16
0
 def __init__(self, source):
     self.source = source
     try:
         self.data = opml.from_string(source)
     except Exception as e:
         raise OpmlSourceError(e)
     try:
         self.number = self._show_number()
         # we need an explicit copy here so we get the unicode str
         self.title = self.data.title[:]
         self.shownotes = self._get_shownotes()
     except ValueError:
         raise ValueError('Bad opml data, no show number')
예제 #17
0
 def __init__(self, source):
     self.source = source
     try:
         self.data = opml.from_string(source)
     except Exception as e:
         raise OpmlSourceError(e)
     try:
         self.number = self._show_number()
         # we need an explicit copy here so we get the unicode str
         self.title = self.data.title[:]
         self.shownotes = self._get_shownotes()
     except ValueError:
         raise ValueError('Bad opml data, no show number')
예제 #18
0
파일: user.py 프로젝트: v-khdumi/JARR
def opml_import():
    if request.files.get('opmlfile', None) is None:
        flash(gettext('Got no file'), 'warning')
        return redirect(url_for('user.profile'))

    data = request.files.get('opmlfile', None)
    try:
        subscriptions = opml.from_string(data.read())
    except:
        flash(gettext("Couldn't parse file"), 'danger')
        return redirect(request.referrer)

    ccontr = CategoryController(current_user.id)
    fcontr = FeedController(current_user.id)
    created_count, existing_count, failed_count = 0, 0, 0
    categories = {cat.name: cat.id for cat in ccontr.read()}
    for line in subscriptions:
        try:
            link = line.xmlUrl
        except Exception:
            failed_count += 1
            continue

        # don't import twice
        if fcontr.read(link=link).count():
            existing_count += 1
            continue

        # handling categories
        cat_id = None
        category = getattr(line, 'category', None)
        if category:
            if category not in categories:
                new_category = ccontr.create(name=category)
                categories[new_category.name] = new_category.id
            cat_id = categories[category]

        fcontr.create(title=getattr(line, 'text', None),
                      category_id=cat_id,
                      description=getattr(line, 'description', None),
                      link=link,
                      site_link=getattr(line, 'htmlUrl', None))
        created_count += 1
    flash(
        gettext(
            "Created %(created)d feed ! (%(failed)d import failed, "
            "%(existing)d were already existing)",
            created=created_count,
            failed=failed_count,
            existing=existing_count), "info")
    return redirect(url_for('user.profile'))
예제 #19
0
def parse_user_opml(user_id, source):
    source = source.replace('<?xml version="1.0" encoding="UTF-8"?>', '')
    try:
        res = opml.from_string(source)
    except Exception as exc:
        logger.error(exc)
        return False
    if res:
        for category in res:
            logger.debug('Category: %s' % category.text)
            for item in category:
                logger.debug('Item: %s %s' % (item.text, item.xmlUrl))
                SourceFactory().add_to_user(user_id, 'feed', item.xmlUrl, item.text, category.text)
        return True
    return False
예제 #20
0
파일: views.py 프로젝트: dz0ny/feedhq
    def post(self, request, *args, **kwargs):
        try:
            entries = opml.from_string(request.body)
        except XMLSyntaxError:
            raise exceptions.ParseError(
                "This file doesn't seem to be a valid OPML file.")

        existing_feeds = set(request.user.feeds.values_list('url', flat=True))
        try:
            with user_lock('opml_import', request.user.pk, timeout=30):
                imported = save_outline(request.user, None, entries,
                                        existing_feeds)
        except ValidationError:
            raise exceptions.ParseError(
                "Another concurrent OPML import is happening "
                "for this user.")
        return Response("OK: {0}".format(imported))
예제 #21
0
    def post():
        opml_file = request.files['opml_file']

        try:
            subscriptions = opml.from_string(opml_file.read())
        except Exception as error:
            raise UnprocessableEntity("Couldn't parse OPML file (%r)" % error)

        ccontr = CategoryController(current_identity.id)
        fcontr = FeedController(current_identity.id)
        counts = {'created': 0, 'existing': 0, 'failed': 0, 'exceptions': []}
        categories = {cat.name: cat.id for cat in ccontr.read()}
        for line in subscriptions:
            try:
                link = line.xmlUrl
            except Exception as error:
                counts['failed'] += 1
                counts['exceptions'].append(str(error))
                continue

            # don't import twice
            if fcontr.read(link=link).count():
                counts['existing'] += 1
                continue

            # handling categories
            cat_id = None
            category = getattr(line, 'category', '').lstrip('/')
            if category:
                if category not in categories:
                    new_category = ccontr.create(name=category)
                    categories[new_category.name] = new_category.id
                cat_id = categories[category]

            fcontr.create(title=getattr(line, 'text', None),
                          category_id=cat_id,
                          description=getattr(line, 'description', None),
                          link=link,
                          site_link=getattr(line, 'htmlUrl', None))
            counts['created'] += 1
        code = 200
        if counts.get('created'):
            code = 201
        elif counts.get('failed'):
            code = 400
        return counts, code
예제 #22
0
파일: utils.py 프로젝트: bzero/JARR
def import_opml(email, opml_content):
    """
    Import new feeds from an OPML file.
    """
    user = User.query.filter(User.email == email).first()
    try:
        subscriptions = opml.from_string(opml_content)
    except:
        logger.exception("Parsing OPML file failed:")
        raise

    def read(subsubscription, nb=0):
        """
        Parse recursively through the categories and sub-categories.
        """
        for subscription in subsubscription:
            if len(subscription) != 0:
                nb = read(subscription, nb)
            else:
                try:
                    title = subscription.text
                except:
                    title = ""
                try:
                    description = subscription.description
                except:
                    description = ""
                try:
                    link = subscription.xmlUrl
                except:
                    continue
                if None != Feed.query.filter(Feed.user_id == user.id, Feed.link == link).first():
                    continue
                try:
                    site_link = subscription.htmlUrl
                except:
                    site_link = ""
                new_feed = Feed(title=title, description=description,
                                link=link, site_link=site_link,
                                enabled=True)
                user.feeds.append(new_feed)
                nb += 1
        return nb
    nb = read(subscriptions)
    db.session.commit()
    return nb
예제 #23
0
파일: user.py 프로젝트: jaesivsm/JARR
def opml_import():
    if request.files.get('opmlfile', None) is None:
        flash(gettext('Got no file'), 'warning')
        return redirect(url_for('user.profile'))

    data = request.files.get('opmlfile', None)
    try:
        subscriptions = opml.from_string(data.read())
    except:
        flash(gettext("Couldn't parse file"), 'danger')
        return redirect(request.referrer)

    ccontr = CategoryController(current_user.id)
    fcontr = FeedController(current_user.id)
    created_count, existing_count, failed_count = 0, 0, 0
    categories = {cat.name: cat.id for cat in ccontr.read()}
    for line in subscriptions:
        try:
            link = line.xmlUrl
        except Exception:
            failed_count += 1
            continue

        # don't import twice
        if fcontr.read(link=link).count():
            existing_count += 1
            continue

        # handling categories
        cat_id = None
        category = getattr(line, 'category', None)
        if category:
            if category not in categories:
                new_category = ccontr.create(name=category)
                categories[new_category.name] = new_category.id
            cat_id = categories[category]

        fcontr.create(title=getattr(line, 'text', None), category_id=cat_id,
                      description=getattr(line, 'description', None),
                      link=link, site_link=getattr(line, 'htmlUrl', None))
        created_count += 1
    flash(gettext("Created %(created)d feed ! (%(failed)d import failed, "
                  "%(existing)d were already existing)",
                  created=created_count, failed=failed_count,
                  existing=existing_count), "info")
    return redirect(url_for('user.profile'))
예제 #24
0
    def POST(self):
        import opml
        x = web.input(importfile={})
        defaultIsfulltext = bool(x.get('defaultIsfulltext'))  #默认是否按全文RSS导入
        if 'importfile' in x:
            user = self.getcurrentuser()
            try:
                rsslist = opml.from_string(x.importfile.file.read())
            except Exception as e:
                return self.GET(str(e))

            for o in self.walkOutline(rsslist):
                title, url, isfulltext = o.text, urllib.unquote_plus(
                    o.xmlUrl), o.isFulltext  #isFulltext为非标准属性
                if isfulltext.lower() in ('true', '1'):
                    isfulltext = True
                elif isfulltext.lower() in ('false', '0'):
                    isfulltext = False
                else:
                    isfulltext = defaultIsfulltext

                if title and url:
                    try:
                        url = url.decode('utf-8')
                    except:
                        pass
                    rss = Feed.all().filter('book = ', user.ownfeeds).filter(
                        "url = ", url).get()  #查询是否有重复的
                    if rss:
                        rss.title = title
                        rss.isfulltext = isfulltext
                        rss.put()
                    else:
                        Feed(title=title,
                             url=url,
                             book=user.ownfeeds,
                             isfulltext=isfulltext,
                             time=datetime.datetime.utcnow()).put()

            raise web.seeother('/my')
        else:
            raise web.seeother(self.__url__)
예제 #25
0
파일: Adv.py 프로젝트: KevinSJ/KindleEar
 def POST(self):
     import opml
     x = web.input(importfile={})
     defaultIsfulltext = bool(x.get('defaultIsfulltext')) #默认是否按全文RSS导入
     if 'importfile' in x:
         user = self.getcurrentuser()
         try:
             rsslist = opml.from_string(x.importfile.file.read())
         except Exception as e:
             return self.GET(str(e))
         
         for o in self.walkOutline(rsslist):
             title, url, isfulltext = o.text, urllib.unquote_plus(o.xmlUrl), o.isFulltext #isFulltext为非标准属性
             if isfulltext.lower() in ('true', '1'):
                 isfulltext = True
             elif isfulltext.lower() in ('false', '0'):
                 isfulltext = False
             else:
                 isfulltext = defaultIsfulltext
                 
             if title and url:
                 try:
                     url = url.decode('utf-8')
                 except:
                     pass
                 rss = Feed.all().filter('book = ', user.ownfeeds).filter("url = ", url).get() #查询是否有重复的
                 if rss:
                     rss.title = title
                     rss.isfulltext = isfulltext
                     rss.put()
                 else:
                     Feed(title=title, url=url, book=user.ownfeeds, isfulltext=isfulltext,
                         time=datetime.datetime.utcnow()).put()
                         
         raise web.seeother('/my')
     else:
         raise web.seeother(self.__url__)
예제 #26
0
import opml

subscriptions = opml.from_string(
    open('subscriptions.opml', 'r').read().encode("UTF-8"))


def is_category(item):
    return not hasattr(item, 'type')


def list_feeds(title, clctn, indent=0):
    print((" " * indent) + title)
    for row in clctn:
        if is_category(row):
            list_feeds(row.title, row, indent + 1)
        else:
            print((" " * (indent + 1)) + row.title + " " + row.xmlUrl)


list_feeds(subscriptions.title, subscriptions)
예제 #27
0
def import_opml(opml_string, user):
    outline = opml.from_string(opml_string)
    if len(outline) == 0:
        raise NoFeedsFound()
    for outline_element in outline:
        process_outline(outline_element, user)
예제 #28
0
def parse(outline_url, my_folder, my_home_index_page):
    global DEBUG
    OPTIONS = {}
    TEMPLATES = {}
    PAGES = {}
    FILENAMES = []
    GLOSSARY_COMPLETE = False
    CALENDARS = {}
    GLOSSARY = DEFAULT_GLOSSARY.copy()
    data_folder, my_folder = os.path.split(my_folder)
    working_dir, DATA_FOLDER = os.path.split(data_folder)
    os.chdir(working_dir)
    base_folder = "%s/%s" % (DATA_FOLDER, my_folder)

    mkdir_p(base_folder)
    outline_url = re.sub('www', 'dl', outline_url)
    if 'usercontent' not in outline_url:
        outline_url = re.sub('dropbox', 'dropboxusercontent', outline_url)
    outline = list(opml.from_string(requests.get(outline_url).content))

    for i, node in enumerate(outline):
        if node.text == '#glossary':
            GLOSSARY.update(grabChildren(outline.pop(i)))
        elif node.text == '#templates':
            TEMPLATES.update(grabChildren(outline.pop(i)))
        else:
            first_word = node.text.split(' ')[0]
            if first_word in GLOSSARY_OPTIONS:
                try:
                    value = OPTIONS[first_word[1:]]
                except:
                    value = node.text
                GLOSSARY.update(GLOSSARY_FUNCTIONS[first_word](value))

    for k, v in GLOSSARY.items():
        if type(v[0]) == type([]):
            GLOSSARY[k] = ''.join(v[1])

    for k, v in TEMPLATES.items():
        if type(v[0]) == type([]):
            TEMPLATES[k] = (v[0], ''.join(v[1]))

    GLOSSARY_COMPLETE = True

    while outline:
        next_node = outline.pop()
        try:
            if next_node.type == 'include':
                real_url = re.sub('dropbox', 'dropboxusercontent',
                                  next_node.url)
                real_url = re.sub('https', 'http', real_url)
                include = list(opml.from_string(
                    requests.get(real_url).content))
                nodes = include
            else:
                nodes = [next_node]
        except:
            nodes = [next_node]

        for node in nodes:
            try:
                if node.icon == 'calendar':
                    try:
                        i_title = node.name
                    except:
                        i_title = node.text
                    CALENDARS = addCalendar('Home', node, i_title, CALENDARS)
                    continue
            except:
                pass
            try:
                if node[0].icon == 'calendar':
                    try:
                        i_title = node[0].name
                    except:
                        i_title = node.text
                    CALENDARS = addCalendar(node.text, node[0], i_title,
                                            CALENDARS)
                    continue
            except:
                pass
            if node.text[0] == '#':
                option = node.text[1:].rstrip().lstrip()
                parts = option.split(' ')
                if len(parts) < 2:
                    OPTIONS[option] = True
                else:
                    key, value = parts[0], ' '.join(parts[1:])
                    if value[0] == '[':
                        value = random.choice(
                            [v.strip() for v in value[1:-1].split(',')])
                    elif value[0] == '"':
                        value = value[1:-1]
                    if value.lower() in ['true', 'false']:
                        OPTIONS[key] = bool(value)
                    else:
                        try:
                            OPTIONS[key] = int(value)
                        except:
                            OPTIONS[key] = value
            else:
                brandLink = '/'
                blogHomeTitle = OPTIONS.get('blogHomeTitle', 'Home')
                page = {}
                try:
                    this_type = node.type
                except:
                    this_type = 'outline'
                try:
                    rules, template = TEMPLATES[this_type]
                except Exception as e:
                    raise Usage(
                        "#templates node required until I pull default templates from Trex. \n\n%s"
                        % e.message)
                template = ''.join(template)
                for k, v in node._root.items():
                    template = re.sub('<%%%s%%>' % k, v, template)
                    page[k] = v
                page_desc = page.get('pageDescription', ' ')
                template = re.sub('<%blogHomeTitle%>', blogHomeTitle, template)
                template = re.sub('<%pageTitle%>', page['text'], template)
                template = re.sub('<%pageDescription%>', page_desc, template)
                if 'name' not in page:
                    page['name'] = makeName(page['text'])
                if 'url' not in page:
                    page['url'] = "/%s" % page['name']
                waste, bodytext = grabData(node, rules, this_type)
                bodytext.append(
                    '</div>'
                )  # not sure why we need this - something's not right
                bodytext = ''.join(bodytext)
                data = re.sub('<%bodytext%>', bodytext, template)
                page['bodytext'] = bodytext
                template = subData(data, GLOSSARY)
                template = re.sub(
                    '<%BRANDMENU%>',
                    '<a class="brand" href="<%BRANDLINK%>"><%BRAND%></a>',
                    template)
                template = re.sub('<%BRAND%>', blogHomeTitle, template)
                template = re.sub('<%BRANDLINK%>', brandLink, template)

                page['data'] = template
                PAGES[page['name']] = page

    for k, v in PAGES.items():
        file_name = getFileName("%s/%s" % (base_folder, v['name']), FILENAMES)
        FILENAMES.append(file_name)
        save_file = False
        new_data = v['data']

        if os.path.exists(file_name):
            fh = open(file_name)
            file_data = fh.read()[:-1]
            fh.close()
            save_file = (file_data != new_data)
        else:
            save_file = True
        if save_file:
            fh = open(file_name, "w+")
            print >> fh, new_data
            fh.close()
            if os.path.basename(file_name) == my_home_index_page:
                print file_name
                fh = open(
                    os.path.join(os.path.split(file_name)[0], "index.html"),
                    "w+")
                print >> fh, new_data
                fh.close()

    blogHomeTitle = OPTIONS.get('blogHomeTitle', 'Home')
    posts = {"Home": []}
    feedcount = OPTIONS.get('feedCount', 20)
    domain = "http://%s" % OPTIONS.get('domainName', '')
    feed_posts = []
    for base, calendar_stuff in CALENDARS.items():
        ycals, index_title = calendar_stuff
        page_data, path_name = None, None
        this_post_data, this_path_name = None, None
        while ycals:
            if this_post_data:
                file_name = getFileName(
                    "%s/%s" % (base_folder, this_path_name), FILENAMES)
                FILENAMES.append(file_name)
                save_file = False
                new_data = re.sub('<nextprev>',
                                  getPrevNextLinks(prev_path_name, path_name),
                                  this_post_data)
                if os.path.exists(file_name):
                    fh = open(file_name)
                    file_data = fh.read()[:-1]
                    fh.close()
                    save_file = (file_data != new_data)
                else:
                    save_file = True
                if save_file:
                    fh = open(file_name, "w+")
                    print >> fh, new_data
                    fh.close()
            ycal = ycals.pop()
            try:
                index_desc = ycal.description
            except:
                index_desc = ''
            year_title = ycal.text
            year_name = ycal.text
            year_num = ycal.text
            if base == 'Home':
                brandLink = '/'
                sub_folder = ''
                root_folder = base_folder
                year_path = year_num
            else:
                sub_folder = makeName(base)
                brandLink = "/%s" % sub_folder
                root_folder = "%s/%s" % (base_folder, sub_folder)
                if not os.path.exists(root_folder): os.mkdir(root_folder)
                year_path = "%s/%s" % (sub_folder, year_num)
                if (sub_folder, index_title) not in posts:
                    posts[(sub_folder, index_title)] = []
            year_folder = "%s/%s" % (base_folder, year_path)
            try:
                if not os.path.exists(year_folder): os.mkdir(year_folder)
            except:
                mkdir_p(year_folder)
            for mcal in ycal:
                month_title = mcal.text
                month_name = month_title.split(' ')[0]
                month_num = MONTHS[month_name]
                month_path = "%s/%s" % (year_path, month_num)
                month_folder = "%s/%s" % (year_folder, month_num)
                if not os.path.exists(month_folder): os.mkdir(month_folder)
                for dcal in mcal:
                    day_title = dcal.text
                    day_name = dcal.text
                    day_num = "%02d" % float(day_title.split(' ')[1])
                    day_path = "%s/%s" % (month_path, day_num)
                    day_folder = "%s/%s" % (month_folder, day_num)
                    if not os.path.exists(day_folder): os.mkdir(day_folder)
                    trail = [(year_path, year_title),
                             (month_path, month_title), (day_path, day_title)]
                    trail_links = """
                    <nextprev>
                    <div class="breadcrumbs"><a href="/%s">%s</a> / %s</div>
                    """ % (sub_folder, base, " / ".join(
                        ['<a href="/%s/">%s</a>' % (l, n) for l, n in trail]))

                    # using page below because it matches above
                    for node in dcal:
                        prev_post_data = this_post_data
                        prev_path_name = this_path_name
                        this_post_data = page_data
                        this_path_name = path_name
                        page = {}
                        try:
                            this_type = node.type
                        except:
                            this_type = 'outline'
                        rules, template = TEMPLATES[this_type]
                        template = ''.join(template)
                        for k, v in node._root.items():
                            template = re.sub('<%%%s%%>' % k, v, template)
                            page[k] = v
                        page_desc = page.get('pageDescription', index_desc)
                        template = re.sub('<%blogHomeTitle%>', blogHomeTitle,
                                          template)
                        template = re.sub('<%pageTitle%>', page['text'],
                                          template)
                        template = re.sub('<%pageDescription%>', page_desc,
                                          template)
                        if 'name' not in page:
                            page['name'] = makeName(page['text'])
                        if 'url' not in page:
                            page['url'] = "/%s" % page['name']
                        waste, bodytext = grabData(node, rules, this_type)
                        bodytext.append(
                            '</div><!--FIX-->'
                        )  # not sure why we need this - something's not right

                        bodytext = '\n'.join(bodytext)
                        data = re.sub('<%bodytext%>', bodytext, template)
                        page['bodytext'] = bodytext
                        template = re.sub('</h1>', '</h1>%s' % trail_links,
                                          subData(data, GLOSSARY))

                        template = re.sub(
                            '<%BRANDMENU%>',
                            '<a class="brand" href="<%BRANDLINK%>"><%BRAND%></a>',
                            template)
                        template = re.sub('<%BRAND%>', index_title, template)
                        template = re.sub('<%BRANDLINK%>', brandLink, template)

                        path_name = "%s/%s" % (day_path, page['name'])
                        page['url'] = "/%s" % path_name
                        listing = page['text'], page['bodytext'], page[
                            'url'], page_desc
                        if feedcount:
                            try:
                                if node.isFeedItem == 'true':
                                    feed_posts.append(
                                        PyRSS2Gen.RSSItem(
                                            title=page['text'],
                                            link=domain + page['url'],
                                            description=page['bodytext'],
                                            guid=domain + page['url'],
                                            pubDate=page['created']))
                                    feedcount -= 1
                            except:
                                pass

                        # Do this after listing so comments don't show on index pages
                        disqusGroupName = OPTIONS.get('disqusGroupName', False)
                        commentsString = ''
                        if disqusGroupName:
                            uniq_id = outline_url + node.created
                            commentsString = """
                            <script>var disqus_identifier = '%s';</script><a onclick="showHideComments ()"><span id="idShowHideComments" style="cursor: pointer;"></span></a><div class="divDisqusComments" id="idDisqusComments" style="visibility: visible;" ><div id="disqus_thread"></div></div><script type="text/javascript" src="http://disqus.com/forums/%s/embed.js"></script></div>
                            """ % (uniq_id, disqusGroupName)

                        page_data = re.sub('<!-- COMMENTS -->', commentsString,
                                           template)
                        page['data'] = page_data

                        if sub_folder:
                            posts[(sub_folder, index_title)].append(listing)
                        else:
                            posts["Home"].append(listing)
                        for path_info in [(year_path, year_title),
                                          (month_path, month_title),
                                          (day_path, day_title)]:
                            if not path_info[0]: continue
                            if path_info not in posts:
                                if sub_folder:
                                    path_info = path_info[0], path_info[1]
                                posts[path_info] = []
                            posts[path_info].append(listing)
                        if this_post_data:
                            file_name = getFileName(
                                "%s/%s" % (base_folder, this_path_name),
                                FILENAMES)
                            FILENAMES.append(file_name)
                            save_file = False
                            new_data = re.sub(
                                '<nextprev>',
                                getPrevNextLinks(prev_path_name, path_name),
                                this_post_data)
                            if os.path.exists(file_name):
                                fh = open(file_name)
                                file_data = fh.read()[:-1]
                                fh.close()
                                save_file = (file_data != new_data)
                            else:
                                save_file = True
                            if save_file:
                                fh = open(file_name, "w+")
                                print >> fh, new_data.encode('utf-16')
                                fh.close()

            if not ycals:
                prev_post_data = this_post_data
                prev_path_name = this_path_name
                this_post_data = page_data
                this_path_name = path_name
                path_name = None
                if this_post_data:
                    file_name = getFileName(
                        "%s/%s" % (base_folder, this_path_name), FILENAMES)
                    FILENAMES.append(file_name)
                    save_file = False
                    new_data = re.sub(
                        '<nextprev>',
                        getPrevNextLinks(prev_path_name, path_name),
                        this_post_data)
                    if os.path.exists(file_name):
                        fh = open(file_name)
                        file_data = fh.read()[:-1]
                        fh.close()
                        save_file = (file_data != new_data)
                    else:
                        save_file = True
                    if save_file:
                        fh = open(file_name, "w+")
                        print >> fh, new_data
                        fh.close()

    # Generate Feed
    date_format = "%a, %d %b %Y %H:%M:%S %Z"
    feed_posts.sort(
        key=lambda x: datetime.datetime.strptime(x.pubDate, date_format),
        reverse=True)
    buildFeed(OPTIONS['rssTitle'], domain, page_desc, feed_posts, base_folder)

    # iterate over posts
    for path_info, these_posts in posts.items():
        count = OPTIONS.get('bloghomeItemCount', 20)

        chunks = [
            these_posts[x:x + count]
            for x in xrange(0, len(these_posts), count)
        ]
        for i, chunk in enumerate(chunks):
            try:
                pageDescription = chunks[0][0][-1]
            except:
                pageDescription = OPTIONS.get('pageDescription', ' ')
            blogHomeDescription = OPTIONS.get('blogHomeDescription',
                                              pageDescription)
            if not i:
                page_name = "index"
            else:
                page_name = str(i + 1)
            if path_info == "Home":
                brandLink = '/'
                page_title = blogHomeTitle
                page_desc = blogHomeDescription
                file_name = getFileName(
                    "%s/%s.html" % (base_folder, page_name), FILENAMES)
                FILENAMES.append(file_name)
            else:
                path, page_title = path_info
                brandLink = "/%s" % path.split('/')[0]
                page_desc = pageDescription
                file_name = getFileName(
                    "%s/%s/%s.html" % (base_folder, path, page_name),
                    FILENAMES)
                FILENAMES.append(file_name)
            rules, template = TEMPLATES['bloghome']
            template = ''.join(template)

            # do the title and desc first to avoid overwriting with home title
            template = re.sub("<\%blogHomeTitle\%>", page_title, template)
            template = re.sub("<\%blogHomeDescription\%>", page_desc, template)
            template = re.sub("<\%pageTitle\%>", page_title, template)
            template = re.sub("<\%pageDescription\%>", page_desc, template)
            template = subData(template, GLOSSARY)

            template = re.sub(
                '<%BRANDMENU%>',
                '<a class="brand" href="<%BRANDLINK%>"><%BRAND%></a>',
                template)
            template = re.sub('<%BRAND%>', page_title, template)
            template = re.sub('<%BRANDLINK%>', brandLink, template)

            bodytext = ''
            for title, page_data, page_url, page_desc in chunk:
                bodytext += "<h2><a href=\"%s\">%s</a></h2>\n%s\n" % (
                    page_url, title, page_data)
            save_file = False
            new_data = re.sub('<%bodytext%>', bodytext, template)
            if os.path.exists(file_name):
                fh = open(file_name)
                file_data = fh.read()[:-1]
                fh.close()
                save_file = (file_data != new_data)
            else:
                save_file = True
            if save_file:
                fh = open(file_name, "w+")
                print >> fh, new_data.encode('utf-16')
                fh.close()
            fh.close()

    return base_folder
예제 #29
0
        connection.test.feed.find_and_modify({'url': self.url}, {'$push': {'items': {'$each': self.items}}})

    def process(self):
        """Common method for create od update feed in db"""
        if self.in_db:
            self.__update_feed_in_db()
        else:
            self.__add_feed_in_db()


if __name__ == "__main__":

    monkey.patch_all()

    logging.basicConfig(format='%(asctime)s::%(levelname)s::%(message)s', filename="fetcher.log", level=logging.INFO)
    logging.info("\n" + "*" * 5 + " New run " + "*" * 5)

    URLS = set()

    def create_job(url):
        f = FeedHandler(url)
        f.process()

    opml_file = open("feedly.opml")
    outline = opml.from_string(opml_file.read())
    for line in outline:
        for item in line:
            URLS.add(item.xmlUrl)

    gevent.joinall([gevent.spawn(create_job, url) for url in URLS])
예제 #30
0
def parse(outline_url, my_folder, my_home_index_page):
    global DEBUG
    OPTIONS = {}
    TEMPLATES = {}
    PAGES = {}
    FILENAMES = []
    GLOSSARY_COMPLETE = False
    CALENDARS = {}
    GLOSSARY = DEFAULT_GLOSSARY.copy()
    data_folder, my_folder = os.path.split(my_folder)
    working_dir, DATA_FOLDER = os.path.split(data_folder)
    os.chdir(working_dir)
    base_folder = "%s/%s" % (DATA_FOLDER, my_folder)

    mkdir_p(base_folder)
    outline_url = re.sub('www','dl',outline_url)
    if 'usercontent' not in outline_url:
        outline_url = re.sub('dropbox','dropboxusercontent', outline_url)
    outline = list(opml.from_string(requests.get(outline_url).content))


    for i, node in enumerate(outline):
        if node.text == '#glossary':
            GLOSSARY.update(grabChildren(outline.pop(i)))
        elif node.text == '#templates':
            TEMPLATES.update(grabChildren(outline.pop(i)))
        else:
            first_word = node.text.split(' ')[0]
            if first_word in GLOSSARY_OPTIONS:
                try:
                    value = OPTIONS[first_word[1:]]
                except:
                    value = node.text
                GLOSSARY.update(GLOSSARY_FUNCTIONS[first_word](value))

    for k,v in GLOSSARY.items():
        if type(v[0]) == type([]):
            GLOSSARY[k] = ''.join(v[1])

    for k,v in TEMPLATES.items():
        if type(v[0]) == type([]):
            TEMPLATES[k] = (v[0],''.join(v[1]))

    GLOSSARY_COMPLETE = True

    while outline:
        next_node = outline.pop()
        try:
            if next_node.type == 'include':
                real_url = re.sub('dropbox','dropboxusercontent',next_node.url)
                real_url = re.sub('https','http',real_url)
                include = list(opml.from_string(requests.get(real_url).content))
                nodes = include
            else:
                nodes = [next_node]
        except:
            nodes = [next_node]

        for node in nodes:
            try:
                if node.icon == 'calendar':
                    try:
                        i_title = node.name
                    except:
                        i_title = node.text
                    CALENDARS = addCalendar('Home',node,i_title, CALENDARS)
                    continue
            except:
                pass
            try:
                if node[0].icon == 'calendar':
                    try:
                        i_title = node[0].name
                    except:
                        i_title = node.text
                    CALENDARS = addCalendar(node.text,node[0],i_title, CALENDARS)
                    continue
            except:
                pass
            if node.text[0] == '#':
                option = node.text[1:].rstrip().lstrip()
                parts = option.split(' ')
                if len(parts) < 2:
                    OPTIONS[option] = True
                else:
                    key, value = parts[0], ' '.join(parts[1:])
                    if value[0] == '[':
                        value = random.choice([v.strip() for v in value[1:-1].split(',')])
                    elif value[0] == '"':
                        value = value[1:-1]
                    if value.lower() in ['true','false']:
                        OPTIONS[key] = bool(value)
                    else:
                        try:
                            OPTIONS[key] = int(value)
                        except:
                            OPTIONS[key] = value
            else:
                brandLink = '/'
                blogHomeTitle = OPTIONS.get('blogHomeTitle','Home')
                page = {}
                try:
                    this_type = node.type
                except:
                    this_type = 'outline'
                try:
                    rules, template = TEMPLATES[this_type]
                except Exception as e:
                    raise Usage("#templates node required until I pull default templates from Trex. \n\n%s" % e.message)
                template = ''.join(template)
                for k,v in node._root.items():
                    template = re.sub('<%%%s%%>' % k,v,template)
                    page[k] = v
                page_desc = page.get('pageDescription', ' ')
                template = re.sub('<%blogHomeTitle%>', blogHomeTitle, template)
                template = re.sub('<%pageTitle%>', page['text'], template)
                template = re.sub('<%pageDescription%>', page_desc, template)
                if 'name' not in page:
                    page['name'] = makeName(page['text'])
                if 'url' not in page:
                    page['url'] = "/%s" % page['name']
                waste, bodytext = grabData(node, rules, this_type)
                bodytext.append('</div>') # not sure why we need this - something's not right
                bodytext = ''.join(bodytext)
                data = re.sub('<%bodytext%>',bodytext,template)
                page['bodytext'] = bodytext
                template = subData(data, GLOSSARY)
                template = re.sub('<%BRANDMENU%>', '<a class="brand" href="<%BRANDLINK%>"><%BRAND%></a>', template)
                template = re.sub('<%BRAND%>', blogHomeTitle, template)
                template = re.sub('<%BRANDLINK%>', brandLink, template)

                page['data'] = template
                PAGES[page['name']] = page

    for k, v in PAGES.items():
        file_name = getFileName("%s/%s" % (base_folder,v['name']),FILENAMES)
        FILENAMES.append(file_name)
        save_file = False
        new_data = v['data']

        if os.path.exists(file_name):
            fh = open(file_name)
            file_data = fh.read()[:-1]
            fh.close()
            save_file = (file_data != new_data)
        else:
            save_file = True
        if save_file:
            fh = open(file_name, "w+")
            print >>fh, new_data
            fh.close()
            if os.path.basename(file_name) == my_home_index_page:
                print file_name
                fh = open(os.path.join(os.path.split(file_name)[0], "index.html"), "w+")
                print >>fh, new_data
                fh.close()

    blogHomeTitle = OPTIONS.get('blogHomeTitle','Home')
    posts = {"Home": []}
    feedcount = OPTIONS.get('feedCount',20)
    domain = "http://%s" % OPTIONS.get('domainName','')
    feed_posts = []
    for base, calendar_stuff in CALENDARS.items():
        ycals, index_title = calendar_stuff
        page_data, path_name = None, None
        this_post_data, this_path_name = None, None
        while ycals:
            if this_post_data:
                file_name = getFileName("%s/%s" % (base_folder, this_path_name), FILENAMES)
                FILENAMES.append(file_name)
                save_file = False
                new_data = re.sub('<nextprev>', getPrevNextLinks(prev_path_name, path_name), this_post_data)
                if os.path.exists(file_name):
                    fh = open(file_name)
                    file_data = fh.read()[:-1]
                    fh.close()
                    save_file = (file_data != new_data)
                else:
                    save_file = True
                if save_file:
                    fh = open(file_name, "w+")
                    print >>fh, new_data
                    fh.close()
            ycal = ycals.pop()
            try:
                index_desc = ycal.description
            except:
                index_desc = ''
            year_title = ycal.text
            year_name = ycal.text
            year_num = ycal.text
            if base == 'Home':
                brandLink = '/'
                sub_folder = ''
                root_folder = base_folder
                year_path = year_num
            else:
                sub_folder = makeName(base)
                brandLink = "/%s" % sub_folder
                root_folder = "%s/%s" % (base_folder,sub_folder)
                if not os.path.exists(root_folder): os.mkdir(root_folder)
                year_path = "%s/%s" % (sub_folder, year_num)
                if (sub_folder,index_title) not in posts: posts[(sub_folder,index_title)] = []
            year_folder = "%s/%s" % (base_folder, year_path)
            try:
                if not os.path.exists(year_folder): os.mkdir(year_folder)
            except:
                mkdir_p(year_folder)
            for mcal in ycal:
                month_title = mcal.text
                month_name = month_title.split(' ')[0]
                month_num = MONTHS[month_name]
                month_path = "%s/%s" % (year_path, month_num)
                month_folder = "%s/%s" % (year_folder, month_num)
                if not os.path.exists(month_folder): os.mkdir(month_folder)
                for dcal in mcal:
                    day_title = dcal.text
                    day_name = dcal.text
                    day_num = "%02d" % float(day_title.split(' ')[1])
                    day_path = "%s/%s" % (month_path, day_num)
                    day_folder = "%s/%s" % (month_folder, day_num)
                    if not os.path.exists(day_folder): os.mkdir(day_folder)
                    trail = [(year_path,year_title),(month_path,month_title),(day_path,day_title)]
                    trail_links = """
                    <nextprev>
                    <div class="breadcrumbs"><a href="/%s">%s</a> / %s</div>
                    """ % (sub_folder, base, " / ".join(['<a href="/%s/">%s</a>' % (l,n) for l,n in trail]))

                    # using page below because it matches above
                    for node in dcal:
                        prev_post_data = this_post_data
                        prev_path_name = this_path_name
                        this_post_data = page_data
                        this_path_name = path_name
                        page = {}
                        try:
                            this_type = node.type
                        except:
                            this_type = 'outline'
                        rules, template = TEMPLATES[this_type]
                        template = ''.join(template)
                        for k,v in node._root.items():
                            template = re.sub('<%%%s%%>' % k,v,template)
                            page[k] = v
                        page_desc = page.get('pageDescription', index_desc)
                        template = re.sub('<%blogHomeTitle%>', blogHomeTitle, template)
                        template = re.sub('<%pageTitle%>', page['text'], template)
                        template = re.sub('<%pageDescription%>', page_desc, template)
                        if 'name' not in page:
                            page['name'] = makeName(page['text'])
                        if 'url' not in page:
                            page['url'] = "/%s" % page['name']
                        waste, bodytext = grabData(node, rules, this_type)
                        bodytext.append('</div><!--FIX-->') # not sure why we need this - something's not right



                        bodytext = '\n'.join(bodytext)
                        data = re.sub('<%bodytext%>',bodytext,template)
                        page['bodytext'] = bodytext
                        template = re.sub('</h1>', '</h1>%s' % trail_links, subData(data, GLOSSARY))

                        template = re.sub('<%BRANDMENU%>', '<a class="brand" href="<%BRANDLINK%>"><%BRAND%></a>', template)
                        template = re.sub('<%BRAND%>', index_title, template)
                        template = re.sub('<%BRANDLINK%>', brandLink, template)

                        path_name = "%s/%s" % (day_path,page['name'])
                        page['url'] = "/%s" % path_name
                        listing = page['text'], page['bodytext'], page['url'], page_desc
                        if feedcount:
                            try:
                                if node.isFeedItem == 'true':
                                    feed_posts.append(
                                        PyRSS2Gen.RSSItem(
                                        title = page['text'],
                                        link = domain + page['url'],
                                        description = page['bodytext'],
                                        guid = domain + page['url'],
                                        pubDate = page['created']
                                        )
                                    )
                                    feedcount -= 1
                            except:
                                pass

                        # Do this after listing so comments don't show on index pages
                        disqusGroupName = OPTIONS.get('disqusGroupName', False)
                        commentsString = ''
                        if disqusGroupName:
                            uniq_id = outline_url + node.created
                            commentsString = """
                            <script>var disqus_identifier = '%s';</script><a onclick="showHideComments ()"><span id="idShowHideComments" style="cursor: pointer;"></span></a><div class="divDisqusComments" id="idDisqusComments" style="visibility: visible;" ><div id="disqus_thread"></div></div><script type="text/javascript" src="http://disqus.com/forums/%s/embed.js"></script></div>
                            """ % (uniq_id, disqusGroupName)

                        page_data = re.sub('<!-- COMMENTS -->', commentsString, template)
                        page['data'] = page_data

                        if sub_folder:
                            posts[(sub_folder,index_title)].append(listing)
                        else:
                            posts["Home"].append(listing)
                        for path_info in [(year_path, year_title), (month_path, month_title), (day_path, day_title)]:
                            if not path_info[0]: continue
                            if path_info not in posts:
                                if sub_folder:
                                    path_info = path_info[0], path_info[1]
                                posts[path_info] = []
                            posts[path_info].append(listing)
                        if this_post_data:
                            file_name = getFileName("%s/%s" % (base_folder, this_path_name), FILENAMES)
                            FILENAMES.append(file_name)
                            save_file = False
                            new_data = re.sub('<nextprev>', getPrevNextLinks(prev_path_name, path_name), this_post_data)
                            if os.path.exists(file_name):
                                fh = open(file_name)
                                file_data = fh.read()[:-1]
                                fh.close()
                                save_file = (file_data != new_data)
                            else:
                                save_file = True
                            if save_file:
                                fh = open(file_name, "w+")
                                print >>fh, new_data.encode('utf-16')
                                fh.close()

            if not ycals:
                prev_post_data = this_post_data
                prev_path_name = this_path_name
                this_post_data = page_data
                this_path_name = path_name
                path_name = None
                if this_post_data:
                    file_name = getFileName("%s/%s" % (base_folder, this_path_name), FILENAMES)
                    FILENAMES.append(file_name)
                    save_file = False
                    new_data = re.sub('<nextprev>', getPrevNextLinks(prev_path_name, path_name), this_post_data)
                    if os.path.exists(file_name):
                        fh = open(file_name)
                        file_data = fh.read()[:-1]
                        fh.close()
                        save_file = (file_data != new_data)
                    else:
                        save_file = True
                    if save_file:
                        fh = open(file_name, "w+")
                        print >>fh, new_data
                        fh.close()

    # Generate Feed
    date_format = "%a, %d %b %Y %H:%M:%S %Z"
    feed_posts.sort(key=lambda x: datetime.datetime.strptime(x.pubDate, date_format), reverse=True)
    buildFeed(OPTIONS['rssTitle'], domain, page_desc, feed_posts, base_folder)

    # iterate over posts
    for path_info, these_posts in posts.items():
        count = OPTIONS.get('bloghomeItemCount',20)

        chunks=[these_posts[x:x+count] for x in xrange(0, len(these_posts), count)]
        for i,chunk in enumerate(chunks):
            try:
                pageDescription = chunks[0][0][-1]
            except:
                pageDescription = OPTIONS.get('pageDescription',' ')
            blogHomeDescription = OPTIONS.get('blogHomeDescription', pageDescription)
            if not i:
                page_name = "index"
            else:
                page_name = str(i+1)
            if path_info == "Home":
                brandLink = '/'
                page_title = blogHomeTitle
                page_desc = blogHomeDescription
                file_name = getFileName("%s/%s.html" % (base_folder, page_name), FILENAMES)
                FILENAMES.append(file_name)
            else:
                path, page_title = path_info
                brandLink = "/%s" % path.split('/')[0]
                page_desc = pageDescription
                file_name = getFileName("%s/%s/%s.html" % (base_folder, path, page_name), FILENAMES)
                FILENAMES.append(file_name)
            rules, template = TEMPLATES['bloghome']
            template = ''.join(template)

            # do the title and desc first to avoid overwriting with home title
            template = re.sub("<\%blogHomeTitle\%>", page_title, template)
            template = re.sub("<\%blogHomeDescription\%>", page_desc, template)
            template = re.sub("<\%pageTitle\%>", page_title, template)
            template = re.sub("<\%pageDescription\%>", page_desc, template)
            template = subData(template, GLOSSARY)

            template = re.sub('<%BRANDMENU%>', '<a class="brand" href="<%BRANDLINK%>"><%BRAND%></a>', template)
            template = re.sub('<%BRAND%>', page_title, template)
            template = re.sub('<%BRANDLINK%>', brandLink, template)

            bodytext = ''
            for title, page_data, page_url, page_desc in chunk:
                bodytext += "<h2><a href=\"%s\">%s</a></h2>\n%s\n" % (page_url, title, page_data)
            save_file = False
            new_data = re.sub('<%bodytext%>', bodytext, template)
            if os.path.exists(file_name):
                fh = open(file_name)
                file_data = fh.read()[:-1]
                fh.close()
                save_file = (file_data != new_data)
            else:
                save_file = True
            if save_file:
                fh = open(file_name, "w+")
                print >>fh, new_data.encode('utf-16')
                fh.close()
            fh.close()

    return base_folder
예제 #31
0
"""
$ pip install opml
$ python opml_to_markdown.py some_outline.opml
-> some_outline.md
"""

import codecs
import opml
import sys

INPUT = sys.argv[1]
OUTPUT = '.'.join(INPUT.split('.')[:-1] + ['md'])

with codecs.open(INPUT, 'r', 'utf-8') as f:
    outline = opml.from_string(f.read())

blocks = []


def _extractBlocks(node):
    for child in node:
        blocks.append(child.text)
        if len(child) > 0:
            _extractBlocks(child)


_extractBlocks(outline)

output_content = '\n\n'.join(blocks)
with codecs.open(OUTPUT, 'w', 'utf-8') as f:
    f.write(output_content)