Example #1
0
def import_feeds(request):
    """Import feeds from an OPML source"""
    if request.method == 'POST':
        form = OPMLImportForm(request.POST, request.FILES)
        if form.is_valid():
            # get the list of existing feeds
            existing_feeds = set(
                request.user.feeds.values_list('url', flat=True))

            entries = opml.parse(request.FILES['file'])
            try:
                with user_lock('opml_import', request.user.pk, timeout=30):
                    imported = save_outline(request.user, None, entries,
                                            existing_feeds)
            except ValidationError:
                logger.info("Prevented duplicate import for user {0}".format(
                    request.user.pk))
            else:
                message = " ".join([
                    ungettext(u'%s feed has been imported.',
                              u'%s feeds have been imported.', imported) %
                    imported,
                    _('New content will appear in a moment when you refresh '
                      'the page.')
                ])
                messages.success(request, message)
                return redirect('feeds:entries')

    else:
        form = OPMLImportForm()

    context = {
        'form': form,
    }
    return render(request, 'feeds/import_feeds.html', context)
Example #2
0
def load_rss_feeds():

    file_to_load = BOT_OPML
    outline = opml.parse(file_to_load)

    # upload the feed in the database, verify if the feed is still the same and update if needed.
    con = parentobj.getDBConnection()
    cur = con.cursor()
    for i in range(0, len(outline) - 1):
        feed_title = outline[i].title
        feed_xmlurl = outline[i].xmlUrl
        feed_htmlurl = outline[i].htmlUrl
        feed_type = outline[i].type

        try:
            sqlquery = "SELECT id from slackbot_feeds WHERE title LIKE ? and type LIKE ? and xmlurl LIKE ? and htmlurl LIKE ?"
            data = (feed_title, feed_type, feed_xmlurl, feed_htmlurl)
            cur.execute(sqlquery, data)
            id = cur.fetchone()[0]
        except:
            sqlquery = "INSERT INTO slackbot_feeds (title, type, xmlurl, htmlurl, borked) VALUES (?,?,?,?,?)"
            data = (feed_title, feed_type, feed_xmlurl, feed_htmlurl, "false")
            cur.execute(sqlquery, data)
            con.commit()

    parentobj.closeDBConnection(con)
Example #3
0
 def get_form_initial(self, step):
     if step == '1':
         src = None
         uploaddata = self.get_cleaned_data_for_step('0')
         if uploaddata['file']:
             fsrc = uploaddata['file']
             str = ""
             for chunk in fsrc.chunks():
                 str += chunk
             ofile = opml.from_string(str)
         else:
             src = uploaddata['url']
             ofile = opml.parse(src)
         initial = []
         for entry in ofile:
             init_entry = {
                 'enabled': True,
                 'title': entry.title,
                 'feedurl': entry.xmlUrl,
                 'wwwurl': entry.htmlUrl,
             }
             initial.append(init_entry)
         return initial
     else:
         return super(OPMLImport, self).get_form_initial(step)
Example #4
0
def get_rss_feed(opml_file):
    file = opml.parse(opml_file)
    podcast_library = []
    for data in file:
        podcast_data = {'rss_link': data.xmlUrl}
        podcast_library.append(podcast_data)
    return podcast_library
Example #5
0
def GetPodcastsFromOPML(request):
    sp = spotipy.Spotify(client_credentials_manager=SpotifyClientCredentials())
    outline = opml.parse(
        "file:///home/viniciusdof/Downloads/podcasts_opml.xml")

    foundPodcastsList = []
    podcastsOPML = OPMLPodcasts(outline.title, foundPodcastsList)
    for val in outline[0]:
        podcastList = []
        podcasts = OPMLPodcastsPodcast(val.text, podcastList)
        result = sp.search(val.text, type="show", market='BR')
        for item in result["shows"]["items"]:
            imageList = []
            for image in item["images"]:
                podcastImage = Image(image["url"], image["height"],
                                     image["width"])
                imageList.append(podcastImage)
            podcast = PodcastPodcast(item["name"], item["publisher"],
                                     item["id"], item["description"],
                                     imageList)
            podcastList.append(podcast)
        podcasts.podcasts = podcastList
        foundPodcastsList.append(podcasts)
    podcastsOPML.podcasts = foundPodcastsList
    return HttpResponse(podcastsOPML.toJSON())
Example #6
0
def getRSSSources():
    """Gets all the RSS sources we want to mine from
    and returns a list of rss objects"""

    print os.getcwd()

    rss_sources = []

    # add required sources

    # add techmeme sources
    #techmeme_sources = opml.parse('http://www.techmeme.com/lb.opml')

    # load from the list on file
    techmeme_sources = opml.parse(
        'articurate/sources/feed_lists/tech_feed_list.opml')

    for item in techmeme_sources:
        try:
            if hasattr(item, 'htmlUrl'):
                rss_sources.append(RSSObj(item.text, item.xmlUrl,
                                          item.htmlUrl))
            else:
                rss_sources.append(RSSObj(item.text, item.xmlUrl))
        except:
            pass

    return {'rss': rss_sources}
Example #7
0
def import_feeds(request):
    """Import feeds from an OPML source"""
    if request.method == 'POST':
        form = OPMLImportForm(request.POST, request.FILES)
        if form.is_valid():
            # get the list of existing feeds
            existing_feeds = set([f.url for f in Feed.objects.filter(
                category__in=request.user.categories.all(),
            )])
            # try to get the "Unclassified" field, create it if needed
            category, created = request.user.categories.get_or_create(
                slug='imported', defaults={'name': _('Imported')},
            )

            entries = opml.parse(request.FILES['file'])
            imported = save_outline(request.user, category, entries,
                                    existing_feeds)

            messages.success(
                request,
                _('%(num)s feeds have been imported' % {'num': imported}),
            )
            return redirect('feeds:home')

    else:
        form = OPMLImportForm()

    context = {
        'form': form,
    }
    return render(request, 'feeds/import_feeds.html', context)
Example #8
0
def import_feeds(request):
    """Import feeds from an OPML source"""
    if request.method == 'POST':
        form = OPMLImportForm(request.POST, request.FILES)
        if form.is_valid():
            # get the list of existing feeds
            existing_feeds = set(request.user.feeds.values_list('url',
                                                                flat=True))

            entries = opml.parse(request.FILES['file'])
            try:
                with user_lock('opml_import', request.user.pk, timeout=30):
                    imported = save_outline(request.user, None, entries,
                                            existing_feeds)
            except ValidationError:
                logger.info("prevented duplicate import", request=request)
            else:
                message = " ".join([ungettext(
                    u'%s feed has been imported.',
                    u'%s feeds have been imported.',
                    imported) % imported,
                    _('New content will appear in a moment when you refresh '
                      'the page.')
                ])
                messages.success(request, message)
                return redirect('feeds:entries')

    else:
        form = OPMLImportForm()

    context = {
        'form': form,
    }
    return render(request, 'feeds/import_feeds.html', context)
Example #9
0
def load_rss_feeds():

    file_to_load = "security.opml"
    outline = opml.parse(file_to_load)

    # upload the feed in the database, verify if the feed is still the same and update if needed.
    con = psycopg2.connect("dbname='%s' user='******'" %
                           (database_name, database_username))
    cur = con.cursor()
    for i in range(0, len(outline) - 1):
        feed_title = outline[i].title
        feed_xmlurl = outline[i].xmlUrl
        feed_htmlurl = outline[i].htmlUrl
        feed_type = outline[i].type

        try:
            sqlquery = "SELECT id from slackbot_feeds WHERE title LIKE %s and type LIKE %s and xmlurl LIKE %s and htmlurl LIKE %s"
            data = (feed_title, feed_type, feed_xmlurl, feed_htmlurl)
            cur.execute(sqlquery, data)
            id = cur.fetchone()[0]
        except:
            sqlquery = "INSERT INTO slackbot_feeds (title, type, xmlurl, htmlurl, borked) VALUES (%s,%s,%s,%s,%s)"
            data = (feed_title, feed_type, feed_xmlurl, feed_htmlurl, "false")
            cur.execute(sqlquery, data)
            con.commit()

    con.close()
Example #10
0
 def read(self):
     """
     reads and parses OPML file (set by constructor)
     """
     data = opml.parse(self.filename)
     for _ in data:
         self.list.append(_.xmlUrl)
Example #11
0
def fill_out_file(inputfile, outputfile):
    nested = opml.parse(inputfile)
    my_dict = {}
    length = len(nested[0])

    i = 0
    while i < length:
        title = nested[0][i].text
        # Remove special characters
        title = re.sub(r'[^A-Za-z0-9]+', '', title)

        url = nested[0][i].xmlUrl
        ## change feed for channel url
        url = url.replace('feeds/videos.xml?channel_id=', 'channel/')

        my_dict[title] = url
        i += 1

    with open(outputfile, 'a') as f_out:

        for title in sorted(my_dict.keys(),
                            key=lambda line: line.lower().split()):
            # print (title, my_dict[title])
            f_out_content = f'''

      [feeds.{title}]
      url = "{my_dict[title]}"
      page_size = 3 # The number of episodes to query each update (keep in mind, that this might drain API token)
      update_period = "12h" # How often query for updates, examples: "60m", "4h", "2h45m"
      quality = "high" # or "low"
      format = "video" # or "audio"'''
            f_out.write(f_out_content)
Example #12
0
def convert_opml_to_rest(opmlPath, restFile, **kwargs):
    'write reST for an OPML outline'
    opmlData = opml.parse(opmlPath)
    print >>restFile, '=' * len(opmlData.title)
    print >>restFile, opmlData.title
    print >>restFile, ('=' * len(opmlData.title)) + '\n'
    write_opml_to_rest(opmlData, restFile, **kwargs)
Example #13
0
def dl(opml_filename,
       database,
       outtmpl,
       fake=False,
       quiet=False,
       verbose=False):
    engine = create_engine(database)
    Base.metadata.create_all(engine)
    Base.metadata.bind = engine
    session = sessionmaker(bind=engine)()

    opts = {}
    if outtmpl:
        opts['outtmpl'] = outtmpl
    if quiet:
        opts['quiet'] = quiet
    if verbose:
        opts['verbose'] = verbose

    for channel in opml.parse(opml_filename)[0]:
        logger.debug('Parsing channel "{}".'.format(channel.title))
        for item in feedparser.parse(channel.xmlUrl)['items']:
            session.add(Video(url=item['link']))
            try:
                session.commit()
            except IntegrityError:
                session.rollback()
            else:
                msg = 'Downloading "{}" from "{}".'
                logger.info(msg.format(item['title'], channel.title))
                if not fake:
                    try:
                        download(item['link'], opts)
                    except Exception:
                        session.rollback()
Example #14
0
def import_feeds(request):
    """Import feeds from an OPML source"""
    if request.method == 'POST':
        form = OPMLImportForm(request.POST, request.FILES)
        if form.is_valid():
            # get the list of existing feeds
            existing_feeds = set([
                f.url for f in Feed.objects.filter(
                    category__in=request.user.categories.all(), )
            ])
            # try to get the "Unclassified" field, create it if needed
            category, created = request.user.categories.get_or_create(
                slug='imported',
                defaults={'name': _('Imported')},
            )

            entries = opml.parse(request.FILES['file'])
            imported = save_outline(request.user, category, entries,
                                    existing_feeds)

            messages.success(
                request,
                _('%(num)s feeds have been imported' % {'num': imported}),
            )
            return redirect('feeds:home')

    else:
        form = OPMLImportForm()

    context = {
        'form': form,
    }
    return render(request, 'feeds/import_feeds.html', context)
Example #15
0
    def start_to_subscribe(self):
        outline = opml.parse('subscription_manager')

        for x in outline[0]:
            channel = x.title
            # clean search box input
            search_for_input = self.driver.find_element_by_xpath(
                '//*[@id="search"]')
            search_for_input.send_keys(Keys.CONTROL + "a")
            search_for_input.send_keys(Keys.DELETE)

            search_for_input.send_keys(channel)

            search_btn = self.driver.find_element_by_xpath(
                '//*[@id="search-icon-legacy"]')
            search_btn.click()

            sleep(1)

            button_msg = self.driver.find_element_by_xpath(
                '//*[@id="subscribe-button"]/ytd-subscribe-button-renderer/paper-button/yt-formatted-string'
            ).text

            print(button_msg)

            if button_msg == 'SUBSCRIBE':

                subscribe_btn = self.driver.find_element_by_xpath(
                    '//*[@id="subscribe-button"]/ytd-subscribe-button-renderer/paper-button'
                )
                subscribe_btn.click()
                print(f"Processed {channel}")
                sleep(1)
Example #16
0
def fetch_opml(url):
    try:
        outline = opml.parse(url)
    except IOError:
        print_d("Failed opening OPML %s" % url)
        return []
    GObject.idle_add(lambda: update_feeds([x.xmlUrl for x in outline]))
Example #17
0
def fetch_opml(url):
    try:
        outline = opml.parse(url)
    except IOError:
        print_d("Failed opening OPML %s" % url)
        return []
    GObject.idle_add(lambda: update_feeds([x.xmlUrl for x in outline]))
Example #18
0
 def handle(self, *args, **options):
     if options['subscriptions']:
         try:
             outline = opml.parse(options['subscriptions'][0])
         except:
             raise CommandError('Not a valid OPML file')
     else:
         raise CommandError('OPML file to import subscription is needed')
     if options['user_ids']:
         user_list = options['user_ids']
     else:
         user_list = User.objects.values_list('id', flat=True)
     for user_id in user_list:
         self.stdout.write('Importing subscriptions for user %s' % user_id)
         try:
             user = User.objects.get(pk=user_id)
         except:
             raise CommandError('User "%s" does not exist' % user_id)
         for category in outline:
             if category:
                 category_title = category.title
             else:
                 category_title = "?"
             self.stdout.write('\tCategory: %s' % category_title)
             for entry in category:
                 self.stdout.write('\t\tSubscribing to: %s' % entry.title)
                 if _is_valid_rss_feed(entry.xmlUrl) and hasattr(entry, 'title'):
                     _create_subscription_and_fetch_articles(user, entry.xmlUrl, entry.title)
                     self.stdout.write(self.style.SUCCESS('\t\tSuccessfully suscribed user %s to %s' % (user_id, entry.title)))
                 else:
                     self.stdout.write(self.style.ERROR('\t\tCould not subscribe user %s to %s: not a valid rss feed' % (user_id, entry.title)))
Example #19
0
def load(opml_resource):
    """
    import an OPML file
    """
    if opml_resource.endswith('.opml'):
        o_resource = opml.parse(opml_resource)
        for folder in o_resource:
            for feed in folder:
                log = f"{folder.text}, {feed.text}"
                # console.print(log, style="blue")
                # create the target folder if not exists
                try:
                    f = Folders.objects.get(title=folder.text)
                except Folders.DoesNotExist:
                    f = Folders.objects.create(title=folder.text)

                # create the Feeds source if not exists
                obj, created = Feeds.objects.get_or_create(
                    title=feed.text,
                    defaults={
                        'title': feed.text,
                        'url': feed.xmlUrl,
                        'folder': f
                    },
                )
                if created:
                    console.print(log + ' created', style="blue")
                else:
                    console.print(log + ' already created', style="yellow")
        console.print('Nyuseu - 뉴스 - Feeds Loaded', style="green")
    else:
        console.print(f"File {opml_resource} is not an OPML file",
                      style="bold red")
def read_opml(target_file):
    '''parses the opml file starting from the top most header
    assumes there is only one top node and it is h1
    '''

    outline = opml.parse(target_file)
    topics = outline[0]

    topic_data_all = []
    topic_content_subnames = [
        "category", "competency", "reason", "description", "summary", "rating", "status",
        "plan", "dependencies", "related", "tags", "resources", "links"
        ]

    counter = 1
    for topic in topics:
        topic_title = topic.text
        topic_data_current = {}
        topic_data_current["name"] = topic_title
        topic_content_subnames_found = []
        for topic_content in topic:
            for topic_content_subname in topic_content_subnames:
                if topic_content.text.lower() == topic_content_subname:
                    subdata = get_topic_subdata(topic_content, topic_content_subname)
                    topic_data_current[topic_content_subname] = subdata
                    topic_content_subnames_found.append(topic_content_subname.title())

        # generated data
        topic_data_current["index"] = topic_content_subnames_found
        topic_data_current["priority"] = counter
        #
        counter += 1
        topic_data_all.append(topic_data_current)

    return topic_data_all
Example #21
0
def main():
    with open("Subscriptions.opml", "r") as f:
        outline = filter(opml.parse(f))
        feeds = []

        for feed in outline:
            feeds.append({"title": feed.text, "htmlUrl": feed.htmlUrl})
        write_markdown(feeds)
Example #22
0
 def _parse_file(self, file_name):
     """
     creates nested structure of feedInfos and feedcategories from the ompl file
     :param file_name: the file to pare
     :return: a FeedInfo Object containing all Information from the file
     """
     outline = opml.parse(file_name)
     return self.get_feeds_from_outline(outline)
Example #23
0
def main():
	with open("Subscriptions.opml", "r") as f:
		outline = filter(opml.parse(f))
		feeds = []

		for feed in outline:
			feeds.append({"title": feed.text, "htmlUrl": feed.htmlUrl})
		write_markdown(feeds)
Example #24
0
def opml_import(request, url):
    import opml
    from opml_import import import_outline_element
    try:
        o = opml.parse(url)
    except:
        return HttpResponse('Cannot parse opml file %s' % url)
    import_outline_element(o)
    return HttpResponse('OK')
Example #25
0
	def __init__(self, f):
		listx = []
		with open(f, "r") as opmlfile:
			outline = opml.parse(opmlfile)
			for i in range(0, len(outline)):
				feed = Feed(outline[i].text, outline[i].title, outline[i].xmlUrl)
				listx.append(feed)
		
		self.feeds = list(listx)
Example #26
0
 def __init__(self, subscription_manager_file):
     self.file = subscription_manager_file
     try:
         self.parsed = opml.parse(self.file)
     except:
         print("Please download \"subscription_manager\" from youtube and place in current directory")
         sys.exit(1)
     self.channels = self.get_channels()
     self.channels_cached_list = []
Example #27
0
def opml_url_parser():
    # return a dictionary (title,url)
    opmlFile = config.RSSOPT_CONFIG['OPML_LOCATION']
    feeds = dict()
    rss = opml.parse(opmlFile)
    for i in range(0, len(rss)):
        for j in range(0, len(rss[i])):
            feeds[rss[i][j].title] = rss[i][j].xmlUrl
    return feeds
Example #28
0
 def handle(self, *args, **options):
     archive = opml.parse('na-archive.opml')
     shows = [show for year in archive for month in year for show in month]
     for show in shows:
         number = show_number(show.text)
         if number and not Show.objects.filter(id=number):
             if not hasattr(show, 'type'):
                 continue
             # print('Handling {} [{}]'.format(show.url, number))
             handle_redirect(show.url)
Example #29
0
 def load_new_feeds(self):
     """ go through feeds to which we subscribe and add any new feeds to our database """
     sub_file = os.path.join(self.config.sub_dir, 'subscriptions.xml')
     outline = opml.parse(sub_file)
     # if there are feeds in subscription.xml that are not in the database, add them
     for feed in outline:
         if not self.dal.session.query(Feed).filter(
                 Feed.xmlUrl == feed.xmlUrl).all():
             self.dal.session.add(Feed(feed.text, feed.xmlUrl))
             self.dal.session.commit()
Example #30
0
def parse_opml_file():
        file = request.files['file']
        outline = opml.parse(file)
        logged_in_user_id = g.user['user_email']

        from feeds_helper import import_opml_file
        try:
            import_opml_file(logged_in_user_id, file, mongo_lib)
            return json.dumps({"message":"OPML imported successfully"})
        except Exception,e:
            return json.dumps({"message":"Could not import OPML file"})
Example #31
0
def import_opml(user_id, opml_url=None, data=None):
    outline = None
    if opml_url is not None:
        outline = opml.parse(opml_url)
    if data is not None:
        outline = opml.from_string(data)
    outline = outline or []
    for entry in outline:
        url = entry.xmlUrl
        print url
        subscribe_to_url(url, user_id)
Example #32
0
 def handle(self, *args, **options):
     archive = opml.parse('na-archive.opml')
     shows = [show for year in archive
              for month in year for show in month]
     for show in shows:
         number = show_number(show.text)
         if number and not Show.objects.filter(id=number):
             if not hasattr(show, 'type'):
                 continue
             # print('Handling {} [{}]'.format(show.url, number))
             handle_redirect(show.url)
Example #33
0
def import_opml(user_id, opml_url=None, data=None):
    outline = None
    if opml_url is not None:
        outline = opml.parse(opml_url)
    if data is not None:
        outline = opml.from_string(data)
    outline = outline or []
    for entry in outline:
        url = entry.xmlUrl
        print(url)
        subscribe_to_url(url, user_id)
Example #34
0
def parse_opml(filename):
    """Parses the given opml of youtube subscriptions."""
    global feeds
    outline = opml.parse(filename)
    assert outline[0].text == 'YouTube Subscriptions'

    with open(SUBSCRIPTION_FILE, 'a+') as subscription_file:
    # yes_for_all = False
        for feed in outline[0]:
            if feed.text not in feeds:
                subscription_file.write('{0:s} : {1:s}\n'.format(feed.text, feed.xmlUrl))
Example #35
0
def opml_import(request, url):
    try:
        o = opml.parse(url)
    except:
        return HttpResponse('Cannot parse opml file %s' % url)
    for f in o:
        new = Feed.objects.create(url = f.xmlUrl,
                                  #tags = self.cleaned_data['tags'],
                                  name = f.title,
                                  )
        new.save()
    return HttpResponse('OK')
Example #36
0
def load_subs(f):
    """Load subscriptions from an exported youtube subscription xml

    Returns the channel feed URLs
    """
    outline = opml.parse(f)

    recs = outline[0]

    urls = [rec.xmlUrl for rec in recs]

    return urls
Example #37
0
def index(request):
    opmlfile = settings.OPML_PATH
    outline = opml.parse(opmlfile)

    opmlstruct = {'title':'default', 'subs':[], 'subfolders':[]}

    def handlesub(sub, subs, folders):
        if len(sub) > 0:
            subf = {'title':sub.title, 'subs':[], 'subfolders':[]}
            folders.append(subf)
            for s in sub:
                handlesub(s, subf['subs'], subf['subfolders'])
        else:
            feedobj = None
            try:
                feedobj = Feed.objects.get(feedurl=sub.xmlUrl)
            except:
                feedobj = None
            subs.append(feedobj)

    handlesub(outline, opmlstruct['subs'], opmlstruct['subfolders'])

    def sortfolders(ostruct):
        ostruct['subs'].sort(key=lambda x: x.title.lower())
        ostruct['subfolders'].sort(key=lambda x: x['title'].lower())
        for f in ostruct['subfolders']:
            sortfolders(f)

    sortfolders(opmlstruct)

    linearopml = []

    def linearizeopml(ostruct):
        linearopml.append(('folder', ostruct['title'], None))
        for subf in ostruct['subfolders']:
            linearizeopml(subf)
            linearopml.append(('outdent', '', None))
        for sub in ostruct['subs']:
            linearopml.append(('sub', sub.title, sub))

    struct_to_linearize = opmlstruct['subfolders'][0]
    for s in struct_to_linearize['subfolders']:
        linearizeopml(s)
        linearopml.append(('outdent', '', None))
    for s in struct_to_linearize['subs']:
        linearopml.append(('sub', s.title, s))

    context = {
        #'feeds': Feed.objects.order_by('title').all()
        'sublist': linearopml
    }
    return render(request, 'feeds/index.html', context)
Example #38
0
def handleopml(path, stdout):
    outline = opml.parse(path)

    def handlesub(sub, indent=0):
        indentch = "".join([' ' for a in range(0, indent)])
        if len(sub) < 1:
            handlefeed(sub, indentch, stdout)
        else:
            print >> stdout, "%sFOLDER: %s" % (indentch, sub.title)
            for s in sub:
                handlesub(s, indent+1)

    handlesub(outline)
Example #39
0
def handleopml(path, stdout):
    outline = opml.parse(path)

    def handlesub(sub, indent=0):
        indentch = "".join([' ' for a in range(0, indent)])
        if len(sub) < 1:
            handlefeed(sub, indentch, stdout)
        else:
            print >> stdout, "%sFOLDER: %s" % (indentch, sub.title)
            for s in sub:
                handlesub(s, indent + 1)

    handlesub(outline)
def import_opml_file(logged_in_user, opml_file, mongo_lib):
    import opml
    default_tag = "default"
    outline = opml.parse(opml_file)
    for entry in outline:
        if hasattr(entry, "xmlUrl"):
            # This does not have any tags so default is the default tag
            add_feed_to_feeds_meta(mongo_lib,entry.xmlUrl, entry.title)
            associate_tags_to_user_feed(mongo_lib,logged_in_user, ['default'], entry.xmlUrl)
        if hasattr(entry, '_outlines'):
            for ent in entry._outlines:
                add_feed_to_feeds_meta(mongo_lib,ent.xmlUrl, ent.title)
                #f.tags = [Tag(entry.text)]
                associate_tags_to_user_feed(mongo_lib,logged_in_user, [entry.text], ent.xmlUrl)
Example #41
0
def get_source_info():
    """
    get stories from opml list
    """
    DEFAULT_OPML = "http://hosting.opml.org/dave/validatorTests/clean/subscriptionList.opml"
    titles = []
    htmlUrls = []
    xmlUrls = []
    outline = opml.parse(DEFAULT_OPML)
    for ii in outline:
        titles.append(ii.text)
        htmlUrls.append(ii.htmlUrl)
        xmlUrls.append(ii.xmlUrl)
    return dict(htmlUrls=htmlUrls, titles=titles, xmlUrls=xmlUrls)
Example #42
0
File: models.py Project: Adyg/drood
    def import_pending(cls):
        pending_files = cls.objects.filter(import_status='P')

        # mark the files as being imported so they are not picked up by another task
        # pending_files.update(import_status='R')
        for pending_file in pending_files:
            #outline = opml.parse(pending_file.opml_file.url)
            outlines = opml.parse('/vagrant/drood/media/opml/2015/5/sarahmarshallfeedly.opml')
            for outline in outlines:
                print outline.title
                try:
                    print outline.xmlUrl
                except:
                    pass
Example #43
0
 def form2_success(request, appstruct):
     opml_file = appstruct['opml']
     opml_data = opml_file['fp']
     outline = opml.parse(opml_data)
     worklist = [e for e in outline]
     n = 0
     while worklist:
         element = worklist.pop(0)
         if hasattr(element, 'xmlUrl'):
             url = element.xmlUrl
             tasks.import_feed(request, url)
             n += 1
         else:
             worklist += element
     return '%d feeds imported' % n
Example #44
0
 def form2_success(request, appstruct):
     opml_file = appstruct['opml']
     opml_data = opml_file['fp']
     outline = opml.parse(opml_data)
     worklist = [e for e in outline]
     n = 0
     while worklist:
         element = worklist.pop(0)
         if hasattr(element, 'xmlUrl'):
             url = element.xmlUrl
             tasks.import_feed(request, url)
             n += 1
         else:
             worklist += element
     return '%d feeds imported' % n
Example #45
0
def get_article_words(opmlfile = None):
	allwords = {}
	articlewords = []
	articletitles = []
	articlelinks = {}
	ec = 0
	
	if opmlfile is not None:
		outline = opml.parse(opmlfile)
		# get the list of feeds from the OPML file
		feedlist = []
		for folder in outline:
			for feed in folder:
				feedlist.append(feed.xmlUrl)
	else:
		feedlist = defflist
		
	# Loop over every feed
	for feed in feedlist:
		f = feedparser.parse(feed)
	
		# Loop over every article
		for e in f.entries:
			# Ignore entries without these fields
			if 'title' not in e or 'link' not in e or 'description' not in e:
				continue
			# Ignore identical articles
			if e.title in articletitles:
				continue
	  
			# Extract the words
			txt = e.title.encode('utf8') + \
				strip_HTML(e.description.encode('utf8'))
			words = separate_words(txt)
			articlewords.append({})
			articletitles.append(e.title)
			articlelinks[e.title] = e.link
	  
			# Increase the counts for this word in allwords and articlewords
			for word in words:
				allwords.setdefault(word, 0)
				allwords[word] += 1
				articlewords[ec].setdefault(word, 0)
				articlewords[ec][word] += 1
				
			ec += 1
			
	return allwords, articlewords, articletitles, articlelinks
Example #46
0
def main(argv=None):
    if argv is None:
        argv = sys.argv
    username = argv[1]
    opmlfile = argv[2]

    try:
        user_id = get_user_id(username)
    except IndexError:
        db.insert('users', username=username)
        user_id = get_user_id(username)

    with open(opmlfile, 'r') as f:
        parsedopml = opml.parse(f)
    
    process_opml(parsedopml, user_id)
Example #47
0
def substract_subs(inputfile):
  nested = opml.parse(inputfile)
  subs = len(nested[0])

  titles = []
  urls = []
  i = 0
  while i < subs:
    title = nested[0][i].text
    # Remove special characters
    title = re.sub(r'[^A-Za-z0-9]+', '', title)
    url = nested[0][i].xmlUrl
    i += 1
    urls.append(url)
    titles.append(title)
  return (urls, titles, subs)
Example #48
0
def getUpdate():
    json_data = []

    outline = opml.parse("podcasts.xml")

    logging.info("Getting Feeds")
    # try:
    if outline[0].text == "feeds":  # This is a pocket casts feed
        for podcast_feed in outline[0]:
            json_data.extend(Update_feeds.getFeeds(podcast_feed.xmlUrl))
    else:
        for podcast_feed in outline:
            json_data.extend(Update_feeds.getFeeds(podcast_feed.xmlUrl))

    # print("Writting JSON data")
    Update_feeds.writeFeeds(json_data)
Example #49
0
def import_opml(opml_source):
    result = []

    def import_outline(outline):
        try:
            if outline.type == 'rss':
                result.append(add_feed(outline.xmlUrl, outline.title))
        except AttributeError:
            if len(outline):
                for o in outline:
                    import_outline(o)

    outlines = opml.parse(opml_source)

    import_outline(outlines)

    return result
Example #50
0
    def handle(self, *args, **kwargs):
        opml_file, username = args

        try:
            user = User.objects.get(username=username)
        except User.DoesNotExist:
            raise CommandError('User {0} does not exist'.format(username))

        stats = defaultdict(int)
        outlines = opml.parse(opml_file)
        for outline in outlines:
            parse_outline(outline, user, stats)

        self.stdout.write(
            'Imported %d new feeds from %s (%d already present)' %
            (stats['created'], opml_file, stats['existing'])
        )
Example #51
0
def main():

    parser = argparse.ArgumentParser(
        description=
        'Read an OPML file and print spiderss TOML format to stdout.')
    parser.add_argument('file', help='OPML input file')
    args = parser.parse_args()

    file = args.file

    try:
        outline = opml.parse(file)
        for o in outline:
            print_outline(o, '')
    except Exception as e:
        print('ERROR: {}'.format(e))
        sys.exit(1)
def create_entries():
    outline = opml.parse("/jchandrashekar/Projects/hqfeeds/google-reader-subscriptions.xml")
    default_tag = Tag("hqfeed_default")
    for entry in outline:
        if hasattr(entry, "xmlUrl"):
            f = Feeds()
            f.mongo_feed_id = entry.xmlUrl
            f.feed_title = entry.title
            dbsession.add(f)
            continue
        if hasattr(entry, '_outlines'):
            for ent in entry._outlines:
                f = Feeds()
                f.mongo_feed_id = ent.xmlUrl
                f.feed_title = ent.title
                f.tags = [Tag(entry.text)]
                dbsession.add(f)
    dbsession.commit()
    print "Total feeds created ", dbsession.query(Feeds).count()
Example #53
0
def import_opml(store, opml_source):
    result = []

    def import_outline(outline):
        try:
            if outline.type == 'rss':
                result.append(add_feed(store, outline.xmlUrl, outline.title))
                store.commit()
                # TODO: handle commit exceptions
        except AttributeError:
            if len(outline):
                for o in outline:
                    import_outline(o)

    outlines = opml.parse(opml_source)

    import_outline(outlines)

    return result
Example #54
0
def opml_import():
    url = request.args.get('url')
    if not url:
        return 'Missing url'
    import opml
    try:
        o = opml.parse(url)
    except:
        return 'Cannot parse opml file %s' % url

    def import_outline_element(o):
        for f in o:
            if hasattr(f,'xmlUrl'):
                s = Source(f.title,'feed',f.xmlUrl)
                db.session.add(s)
            else:
                import_outline_element(f)

    import_outline_element(o)
    db.session.commit()
    flash('import successed')
    return redirect(request.referrer or '/')
Example #55
0
def import_opml(user_id, path):
    _opml = opml.parse(path)

    uncategorized = None
    for outline in _opml:
        if hasattr(outline, 'xmlUrl'):
            if uncategorized is None:  # does not defined yet
                uncategorized = Category.query.filter_by(user_id=user_id, name="Uncategorized").first()
                if uncategorized is None:  # not found
                    uncategorized = Category(user_id, "Uncategorized", order_id=9999)
                    uncategorized.save()

            feed = Feed(outline.xmlUrl)
            feed.save()

            user_feed = UserFeed(user_id, uncategorized.id, feed.id, outline.text)
            user_feed.save()

        else:
            category = Category.query.filter_by(user_id=user_id, name=outline.text).first()
            if category is None:
                category = Category(user_id, outline.text)
                category.save()

            for child in outline:
                if hasattr(child, 'xmlUrl'):
                    hash = xxhash.xxh64()
                    feed = Feed.query.filter_by(feed_url_hash=hash).first()
                    if feed is None:
                        feed = Feed(child.xmlUrl)
                        feed.save()

                    user_feed = UserFeed(user_id=user_id, category_id=category.id, feed_id=feed.id, feed_name=child.text)
                    user_feed.save()
                else:
                    logger.warn("Nested category is not supported yet, ignored!")
import opml
import MySQLdb
import ConfigParser

config = ConfigParser.RawConfigParser()
config.read('reader.cfg')

#import feedparser
#feedparser._HTMLSanitizer.acceptable_elements = feedparser._HTMLSanitizer.acceptable_elements + ['object', 'embed','iframe']
#import bs4

db=MySQLdb.connect(host="localhost",user=config.get('Database', 'username'),passwd=config.get('Database', 'password'),db="test_rss", charset='utf8')
cur = db.cursor()

opml_file = 'subscriptions.xml'
o=opml.parse(opml_file)

print 'reading ' + o.title + ' ...'

#create list of existing labels
cur.execute('SELECT * FROM reader_label')
labels = []
for item in cur.fetchall():
        labels.append(item[1])
#add labels
for item in o:
    if len(item) > 0:
        if item.title in labels:
            print 'LABEL EXISTS', item.title
        else:
            cur.execute('insert into reader_label (label) VALUES (%s)', item.title)
Example #57
0
File: meta.py Project: anwen/anwen
import opml
import sys

opml_file = 'feeds/Reabble-zh.opml'
opml_file = 'feeds/Kindle4RSS-Feeds.xml'
opml_file = 'feeds/feedly_askender.opml'

outline = opml.parse(opml_file)

# meta optional
# print(outline.title)
# print(outline.ownerName)
# print(outline.ownerEmail)
# info
# print(len(outline))

level = int(sys.argv[1])

assert len(outline)
for one in outline:
    if hasattr(one, 'type'):
        assert one.type == 'rss'

    # if not len(one):
    if level == 1 and not len(one):
        print()
        print('### {}'.format(one.text))
        if one.title != one.text:
            print(one.title)
            raise
        # 1级分类