Ejemplo n.º 1
0
def extract_form_opml(request):
    BASE_DIR = os.path.dirname(os.path.dirname(__file__))
    print BASE_DIR
    bbc = listparser.parse(BASE_DIR + "/static/feeds.opml")
    sina = listparser.parse(BASE_DIR + "/static/sina_all_opml.xml")
    for feed in bbc.feeds:
        cat = RSSCategory.objects.filter(name=feed.title).first()
        if not cat:
            cat = RSSCategory(name=feed.title, publisher=bbc.meta.title)
            cat.save()
        if not RSSSourceList.objects.filter(url=feed.url):
            source = RSSSourceList(url=feed.url,
                                   category=cat,
                                   last_update=timezone.now())
            source.save()
    for feed in sina.feeds:
        cat = RSSCategory.objects.filter(name=feed.title).first()
        if not cat:
            cat = RSSCategory(name=feed.title, publisher=bbc.meta.title)
            cat.save()
        if not RSSSourceList.objects.filter(url=feed.url):
            source = RSSSourceList(url=feed.url,
                                   category=cat,
                                   last_update=timezone.now())
            source.save()
    return JsonResponse({'status': True})
Ejemplo n.º 2
0
def import_opml(path, create_feed=True, create_entries=False):
    res = listparser.parse(path)
    ret = []
    for feed in res.feeds:
        flat_cats = []
        for cat_list in feed.categories:
            for cat in cat_list:
                flat_cats.append(cat)  # nested-nested categories? ew.

        # feeds can only have one category currently, use the last one found
        cat = flat_cats[-1]
        logger.debug("all known categories for this feed: %s" % flat_cats)
        logger.info("fetching or creating FeedGroup %r" % cat)
        feed_group, created = models.FeedGroup.objects.get_or_create(
            slug=slugify(cat))
        if created:
            logger.info("new feed group created %r" % feed_group)
        else:
            logger.info("found feed group %r" % feed_group)

        # pull the feed down
        success, data = logic.pull_feed(feed.url, create_feed, create_entries)
        ret.append((success, feed.url))
        if success:
            logger.info("successfully pulled feed, associating with group")
            # attach the feed group
            feed = data['feed']
            feed.group = feed_group
            feed.save()
        else:
            logger.warning("failed to pull feed, error was: %s", data)

    return ret
Ejemplo n.º 3
0
    def run(self):
        f = StringIO(self.opml_data.encode('utf-8'))

        opml_obj = listparser.parse(f)

        for feed in opml_obj.feeds:
            if not feed.tags:
               self.add_uncategorized_feed(feed) 

            for category in feed.tags:
                category_entry = self.get_or_set_category(category)         

                try:
                    feed_entry = self.get_or_set_feed(feed)
                except:
                    self.failed_feeds += 0
                    continue

                if self.is_feed_in_category(feed_entry, category_entry):
                    continue

                category_entry.feeds.append(feed_entry)
        
        Feed.uncategorize_feeds()
        db.session.commit()

        ret = self.get_return_status()
        return ret
Ejemplo n.º 4
0
 def parse(self, response):
     d = listparser.parse(response.body)
     feeds = d.feeds
     for feed in feeds:
         item = PodsearchbotItem()
         item['link'] = feed.url
         yield item
Ejemplo n.º 5
0
    def import_feeds(self, source):
        """Tries to parse and import an opml file exported from another RSS reader. Will try
        to keep name and categories.

        Args:
            source (string): Path of the opml file.
        """
        result = listparser.parse(source)
        name = result.meta.title
        size = len(result.feeds)
        self.output.write_info(
            f"Do you want to import {size} feeds from {name}? [y]es/[n]o/[v]iew"
        )
        answer = input()
        if answer.lower() == "v" or answer.lower() == "view":
            for i in result.feeds:
                print(f"{i.title} : {i.url}")
        elif answer.lower() == "y" or answer.lower() == "yes":
            try:
                for i in result.feeds:
                    if self.verbose: print(f"Trying to add {i.title}")
                    if len(i.categories) > 0:
                        if self.verbose: print("Grabbing categories")
                        categories = i.categories[0]
                    else:
                        categories = []
                    self.add_feed(i.title, i.url, categories)
            except Exception as e:
                self.output.write_error(
                    f"Something went wrong when importing {i}!: {e}")
            finally:
                self.output.write_ok("Feeds imported successfully.")
Ejemplo n.º 6
0
 def OnImport(self, e):
     opml_result = listparser.parse(self.opml_file)
     for f in opml_result.feeds:
         print(f)
         print("Importing {} -> {}".format(f.title, f.url))
         db.feed.subscribe_feed(f.title, f.url, f.tags)
     self.Destroy()
Ejemplo n.º 7
0
	def mass_Import_WX_ID_from_opml(self, opemlFile_or_Content_or_URL):
		'''
		listparser.parse(obj[, agent, etag, modified])
		Parameters:
			obj (file or string) – a file-like object or a string containing a URL, an absolute or relative filename, or an XML document
			agent (string) – User-Agent header to be sent when requesting a URL
			etag (string) – ETag header to be sent when requesting a URL
			modified (string or datetime) – Last-Modified header to be sent when requesting a URL
		'''
		opml = listparser.parse(opemlFile_or_Content_or_URL)
		for feed in opml.feeds:
			try:
				wx_id=re.findall("weixin\?id=(\S+)$", feed.url)[0]
			except IndexError:
				print "---- WX_ID Paste Error!%s"%feed.url
			if not self.is_WX_ID_Exists(wx_id):
				WX_ID = Node("WX_ID")
				info = {
					"wx_id": wx_id,
					"name": feed.title,
					"group": feed.categories[0][0]
				}
				WX_ID.update(info)
				self.neo4j.create(WX_ID)
				print "++++ WX_ID Simple stored:\t%s" % wx_id
		return True
Ejemplo n.º 8
0
    def handle(self, *args, **options):
        if not len(args) == 2:
            self.stdout.write('args must be <username> <opml url>')
            return

        try:
            d = lp.parse(args[1])
            user = User.objects.get(username=args[0])
        except Exception as e:
            print(e.encode('utf8'))
            return

        for feed in d.feeds:
            # change categories to tags
            categories = feed['categories'][0]
            tags = []
            for category in categories:
                if category == 'My Feeds':
                    continue
                tag, tag_created = Tag.objects.get_or_create(text=category, user=user)
                tags.append(tag)
            new_feed, created = Feed.objects.get_or_create(
                link=feed['url'],
                user=user
            )
            if created:
                new_feed.title = feed['title']
                new_feed.tags.add(*tags)
                new_feed.save()
            else:
                print('Already exists %d ' % new_feed.id)
            print(new_feed.title.encode('utf8'))
Ejemplo n.º 9
0
async def opml_import(event: Union[events.NewMessage.Event, Message],
                      *_,
                      lang: Optional[str] = None,
                      **__):
    reply_message: Message = await event.get_reply_message()
    if not (event.is_private or event.is_channel
            and not event.is_group) and reply_message.sender_id != env.bot_id:
        return  # must reply to the bot in a group to import opml
    try:
        opml_file = await event.download_media(file=bytes)
    except Exception as e:
        await event.reply('ERROR: ' + i18n[lang]['fetch_file_failed'])
        logger.warning(f'Failed to get opml file from {event.chat_id}: ',
                       exc_info=e)
        return

    reply: Message = await event.reply(i18n[lang]['processing'] + '\n' +
                                       i18n[lang]['opml_import_processing'])
    logger.info(f'Got an opml file from {event.chat_id}')

    opml_d = listparser.parse(opml_file.decode())
    if not opml_d.feeds:
        await reply.edit('ERROR: ' + i18n[lang]['opml_parse_error'])
        return

    import_result = await inner.sub.subs(event.chat_id,
                                         tuple(feed.url
                                               for feed in opml_d.feeds),
                                         lang=lang)
    logger.info(f'Imported feed(s) for {event.chat_id}')
    await reply.edit(import_result["msg"], parse_mode='html')
Ejemplo n.º 10
0
def import_opml(session, opml):
    feedlist = listparser.parse(opml)

    for f in feedlist.feeds:
        # skip entries without URLs
        if not hasattr(f, 'url'):
            continue

        # run a HEAD request against url to find out final URL, in case of any
        # redirects
        f.url = find_canonical_url(f.url)

        feed = session.query(model.Feed).filter_by(url=f.url).first()

        if feed:
            # feed url already present in database
            continue

        logger.debug(
            "Importing feed '{title}', URL '{url}, categories: {categories}'".
            format(
                title=f.title,
                url=f.url,
                categories=f.categories,
            )
        )

        feed = model.Feed(title=f.title, url=f.url, has_subscribers=True)
        session.add(feed)

    session.commit()
Ejemplo n.º 11
0
 def _process_url(self):
     url = str(self.url)
     result = listparser.parse(url)
     if result["bozo"] == 1:
         return False
     self._process_result(result)
     return True
Ejemplo n.º 12
0
 def parse(self, response):
     d = listparser.parse(response.body)
     feeds = d.feeds
     for feed in feeds:
         item = PodsearchbotItem()
         item["link"] = feed.url
         yield item
Ejemplo n.º 13
0
 def fn(self):
     doc = listparser._to_bytes("""<?xml version="1.0"?><rdf:RDF
             xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
             xmlns:foaf="http://xmlns.com/foaf/0.1/"
             xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#"
             xmlns:rss="http://purl.org/rss/1.0/">
             <foaf:Agent><foaf:name>&aacute;</foaf:name><foaf:weblog>
             <foaf:Document rdf:about="http://domain/"><rdfs:seeAlso>
             <rss:channel rdf:about="http://domain/feed" />
             </rdfs:seeAlso></foaf:Document></foaf:weblog></foaf:Agent>
             </rdf:RDF>""")
     idoc = listparser.Injector(listparser.BytesStrIO(doc))
     tmp = []
     while 1:
         i = idoc.read(size)
         if i:
             tmp.append(i)
         else:
             idoc.close()
             break
     xml = _to_unicode(listparser._to_bytes('').join(tmp))
     result = listparser.parse(xml)
     self.assertFalse(result.bozo)
     self.assertEqual(len(result.feeds), 1)
     self.assertEqual(ord(result.feeds[0].title), 225) # \u00e1
Ejemplo n.º 14
0
def _parse_opml(text):
    result = {}
    result['items'] = items = []
    raw = listparser.parse(io.StringIO(text))
    bozo_exception = raw.get('bozo_exception')
    if bozo_exception:
        LOG.warning(f'Parse OPML {bozo_exception}')
    result['title'] = (raw['meta'] or {}).get('title')
    for feed in (raw['feeds'] or []):
        url = feed.get('url')
        title = feed.get('title')
        # ignore title if it's url. eg: rssant before v1.8 export text(title) field with feed link
        if title and RE_URL.match(title):
            title = None
        # eg: {'url': '...', 'title': '...', 'categories': [['设计']], 'tags': ['设计']}
        categories = feed.get('categories')
        group = categories[0] if categories else None
        if group and isinstance(group, list):
            group = group[0]
        group = str(group) if group is not None else None
        if not url:
            continue
        url = _normalize_url(url)
        items.append(dict(
            title=title,
            group=group,
            url=url,
        ))
    total = len(result['items'])
    if total > IMPORT_ITEMS_LIMIT:
        LOG.warning(f'import {total} OPML feeds exceed limit {IMPORT_ITEMS_LIMIT}, will discard!')
        result['items'] = result['items'][:IMPORT_ITEMS_LIMIT]
    result = validate_opml(result)
    result['items'] = [x for x in result['items'] if x['url']]
    return result
Ejemplo n.º 15
0
def get_feeds(feeds_file, max_age, max_feeds):
    opml = lp.parse(feeds_file)
    feeds = opml.feeds

    feeds = feeds[:max_feeds]
    
    md = Markdown()
    filename = "rssdigest.html"
    with open(filename, "w") as text_file:
        text_file.write(md.convert("# Daily RSS Digest \n----"))
    
    
    digeststring = "# Daily RSS Digest \n----\n\n"
    
    number_of_feeds = len(feeds)
    for index, feed in enumerate(feeds):
        feed = feedparser.parse(feed.url)
        feedstring = ""
        addfeed = False
    
        print("[" + str(index) + "/" + str(number_of_feeds) + "]")
    
        if 'title' in feed.feed:
            feedstring += "## " + feed.feed.title + "\n"
        
        for entry in feed.entries:
            localtime = time.localtime()
            try:
                publishedtime = entry.published_parsed
                # age in days
                age = (time.mktime(localtime) - time.mktime(publishedtime)) / 60 / 60 / 24
                if age < max_age:
                    feedstring += "## ["+entry.title+"]("+entry.link+")\n\n"
                    if 'description' in entry:
                        if len(entry.description) < 500:
                            feedstring += entry.description + "\n\n"
                    addfeed = True
            except:
                pass
    
        if not addfeed:
            print(feedstring + "No new posts\n")
    
        feedstring += "----\n"
    
        if addfeed:
            print(feedstring)
            # Append to string
            digeststring += feedstring
            # Append to file
            with open(filename, "a") as text_file:
                feedhtml = md.convert(feedstring)
                text_file.write(feedhtml)
    
    digesthtml = md.convert(digeststring)    

    # print("Final: " + digesthtml)

    return digesthtml
Ejemplo n.º 16
0
 def worker(self, evals, testfile, etag, modified):
     if 'http' in testfile:
         testfile = 'http://localhost:8091/tests/' + testfile
     else:
         testfile = join('tests', testfile)
     result = listparser.parse(testfile, etag=etag, modified=modified)
     for ev in evals:
         self.assert_(eval(ev))
Ejemplo n.º 17
0
def parse_opml(file):
    opml_feeds = [{
        "title": feed["title"],
        "url": feed["url"],
        "entries": [],
        "last_updated": None
    } for feed in listparser.parse(file)["feeds"]]
    return opml_feeds
Ejemplo n.º 18
0
 def fill_feed_info(self, opml_file):
     """
     Import the file
     """
     parsed = listparser.parse(opml_file)
     for feed in parsed.feeds:
         print("Adding %s" % feed.url)
         Feed.objects.get_or_create(feed_url=feed.url)
Ejemplo n.º 19
0
def parse(file_name):
    result = listparser.parse(file_name)
    rst = {}
    for feed in result.feeds:
        for tag in feed['tags']:
            i = [feed['title'], feed['url']]
            rst.setdefault(tag, []).append(i)
    return rst
Ejemplo n.º 20
0
 def testUserAgentGlobalOverride(self):
     url = 'http://localhost:8091/tests/http/useragent.xml'
     tmp = listparser.USER_AGENT
     listparser.USER_AGENT = "NewGlobalAgent"
     result = listparser.parse(url)
     listparser.USER_AGENT = tmp
     self.assertFalse(result.bozo)
     self.assert_(result.headers.get('x-agent') == "NewGlobalAgent")
Ejemplo n.º 21
0
    def __init__(self, opml_file):

        print "reading opml file..."

        self.info = listparser.parse(opml_file)
        self.feeds = self.info.feeds

        print self.info.meta.title
        print self.info.meta.created
Ejemplo n.º 22
0
    def __init__(self, opml_file):

        print "reading opml file..."

        self.info = listparser.parse(opml_file)
        self.feeds = self.info.feeds

        print self.info.meta.title
        print self.info.meta.created
Ejemplo n.º 23
0
 def clean_xml_file(self):
     xml_file = self.cleaned_data['xml_file']
     result = listparser.parse(xml_file)
     
     if result['bozo'] == 1:
         raise ValidationError(result['bozo_exception'])
     
     self.result = result
     return xml_file
Ejemplo n.º 24
0
 def import_opml(self, opml_file):
     """
     Import feeds from an opml file
     
     Parameters
     ----------
     opml_file : string
         The relative path to the opml file that we want to import. 
     """
     print("importing " + opml_file)
     
     new_feeds = listparser.parse(opml_file)
     for feed in new_feeds.feeds:
         print(feed.url)
         self.add_podcast(feed.url)
Ejemplo n.º 25
0
 def read_rss(self, url):
     utils.log('Read File: %s' % url,xbmc.LOGINFO)
     if url not in rss_cache:
         utils.log('File not in cache, requesting...',xbmc.LOGINFO)
         xml = httpget(url)
         progs = listparser.parse(xml, self.format)
         if not progs: return []
         d = []
         for entry in progs.entries:
             p = programme_simple(entry.id, entry)
             d.append(p)
         utils.log('Found %d entries' % len(d),xbmc.LOGINFO)
         rss_cache[url] = d
     else:
         utils.log('File found in cache',xbmc.LOGINFO)
     return rss_cache[url]
Ejemplo n.º 26
0
    def handle(self, *args, **options):
        opml_file = open(args[0])
        opml = listparser.parse(opml_file)

        for feed in opml.feeds:
            print "%s: %s" % (feed.title, feed.url)
            feed_object = Feed.objects.create(name=feed.title, feed_url=feed.url)
            feed_object.save()
            for tag in feed.tags:
                # .get_or_create() with a name that begins with a number
                # (eg. '0-premium') causes .add() to break: "TypeError: int()
                # argument must be a string or a number, not 'Label'" so
                # we fetch the label again. Le sigh.
                label = Label.objects.get_or_create(name=tag)
                label = Label.objects.get(name=tag)
                feed_object.labels.add(label)
Ejemplo n.º 27
0
def upload(request):
    #TODO sort out multi (implement file drop?)
    params = []
    if request.method == 'GET':
        print "Importing OPML file"
        filename = request.GET.get('filename') or './opml2.xml'
        d = listparser.parse(filename) #request.POST['filename']
        for f in d.feeds:
            print f.title
            feed = Feed(title=f.title)
            feed.url = f.url
            feed.user = request.user
            feed.save()

    params = {'Messages': ['Your import might have been a success!',]}
    return response(request, 'mainapp/index.html', params)
Ejemplo n.º 28
0
 def start_parsing(self, dest_path,csv_checked, csv_first_line_header,plain_checked):
     try:
         
         td = self.unzip(self.path_dict[self.FIND_ZIP_DICT_KEY])
         
         files = self.search_path_files(td)
         
         flag_subscriptions_xml_found = False
         destination_filename = self.dest_filename+'_subscriptions.csv'
         
         for f in files:
             fileName, fileExtension = os.path.splitext(f)
             if fileName.find('subscriptions')>=0 and fileExtension == '.xml':
                 flag_subscriptions_xml_found = True
                 
                 if(csv_checked.get() == 1):
                         flag_header_written = False
                         with open(os.path.join(dest_path.get(),destination_filename),'wb') as csvfile:
                             csv_writer=csv.writer(csvfile,delimiter=',',quotechar='"',quoting=csv.QUOTE_ALL)
                             
                             parsedxml = listparser.parse(f)
                             
                             keys=parsedxml.feeds[0].keys()
                             
                             if(csv_first_line_header.get() == 1 and not flag_header_written):
                                 csv_writer.writerow(keys)
                                 flag_header_written = True
 
                             for e in parsedxml.feeds:
                                 row = []
                                 for k in keys:
                                     if type(e[k]) is list:
                                         row.append(self.list_to_string(e[k]))
                                     else:
                                         row.append(e[k])
                                 csv_writer.writerow(row)
                         
                         self.gui_logger("File written: "+destination_filename)
                 
         
         if flag_subscriptions_xml_found is False:
             self.gui_logger("Error: subscriptions.xml not found in zip file.")
                                 
         self.gui_logger("Done!")
     
     except BadZipfile:
         self.gui_logger("Error: File is not a zip file.")
Ejemplo n.º 29
0
 def read_rss(self, url):
     #logging.info('Read RSS: %s', url)
     if url not in rss_cache:
         #logging.info('Feed URL not in cache, requesting...')
         xml = httpget(url)
         progs = listparser.parse(xml)
         if not progs: return []
         d = []
         for entry in progs.entries:
             pid = parse_entry_id(entry.id)
             p = programme(pid)
             d.append(p)
         #logging.info('Found %d entries', len(d))
         rss_cache[url] = d
     #else:
     #    logging.info('RSS found in cache')
     return rss_cache[url]
Ejemplo n.º 30
0
 def read_rss(self, url):
     #logging.info('Read RSS: %s', url)
     if url not in rss_cache:
         #logging.info('Feed URL not in cache, requesting...')
         xml = httpget(url)
         progs = listparser.parse(xml)
         if not progs: return []
         d = []
         for entry in progs.entries:
             pid = parse_entry_id(entry.id)
             p = programme(pid)
             d.append(p)
         #logging.info('Found %d entries', len(d))
         rss_cache[url] = d
     #else:
     #    logging.info('RSS found in cache')
     return rss_cache[url]
Ejemplo n.º 31
0
 def read_rss(self, url):
     utils.log('Read RSS: %s' % url,xbmc.LOGINFO)
     if url not in rss_cache:
         utils.log('Feed URL not in cache, requesting...',xbmc.LOGINFO)
         xml = httpget(url)
         # utils.log("Received xml: %s" % xml,xbmc.LOGDEBUG)
         progs = listparser.parse(xml)
         if not progs: return []
         d = []
         for entry in progs.entries:
             pid = parse_entry_id(entry.id)
             p = programme_simple(pid, entry)
             d.append(p)
         utils.log('Found %d entries' % len(d),xbmc.LOGINFO)
         rss_cache[url] = d
     else:
         utils.log('RSS found in cache',xbmc.LOGINFO)
     return rss_cache[url]
Ejemplo n.º 32
0
Archivo: views.py Proyecto: sqroc/venus
def upload_opml_file(request):
     UploadFile = request.FILES['file']
     result = listparser.parse(UploadFile.read())
     title = result.meta.title
     if title != '' :
         count = 0;
         for i in result.feeds:
             c1 = Category(uid=1, name=i.tags)
             c2 = Category.objects.filter(name=i.tags)
             if c2.count() == 0:
                 c1.save()
             else: 
                 c1.cid = c2[0].cid
             r1 = Rss(uid=1, cid=c1.cid, sitename=i.title, xmlurl=i.url, htmlurl='', updatetime=datetime.datetime.now())
             r1.save()
             count += 1
        
     return render_to_response('resultopml.html', locals())
Ejemplo n.º 33
0
 def __read_rss (cls, url):
     #logging.info ('Read RSS: %s', url)
     if url not in RSS_CACHE:
         #logging.info ('Feed URL not in cache, requesting...')
         xml = httpget (url)
         progs = listparser.parse (xml)
         if not progs:
             return []
         cached_programmes = []
         for entry in progs.entries:
             pid = parse_entry_id (entry.identifier)
             programme = Programme (pid)
             cached_programmes.append (programme)
         #logging.info ('Found %d entries', len (d))
         RSS_CACHE[url] = cached_programmes
     #else:
     #    logging.info ('RSS found in cache')
     return RSS_CACHE[url]
Ejemplo n.º 34
0
 def __read_rss(cls, url):
     #logging.info ('Read RSS: %s', url)
     if url not in RSS_CACHE:
         #logging.info ('Feed URL not in cache, requesting...')
         xml = httpget(url)
         progs = listparser.parse(xml)
         if not progs:
             return []
         cached_programmes = []
         for entry in progs.entries:
             pid = parse_entry_id(entry.identifier)
             programme = Programme(pid)
             cached_programmes.append(programme)
         #logging.info ('Found %d entries', len (d))
         RSS_CACHE[url] = cached_programmes
     #else:
     #    logging.info ('RSS found in cache')
     return RSS_CACHE[url]
Ejemplo n.º 35
0
def parser_opml(handler):
    result = listparser.parse(handler)
    logger.debug(result)
    source = dict()

    for feed in result.feeds:
        group = feed.categories
        for g in group:
            if g[0].lower() == "must read":
                continue
            else:
                group = g[0]
        # print(group)
        if group not in source.keys():
            source[group] = list()

        source[group].append(dict(title=feed.title, url=feed.url))

    return source
Ejemplo n.º 36
0
def read_opml(path):
    try:
        import listparser
    except Exception:
        raise errors.AnsibleFilterError('the "opml" filter requires the \
                "listparser" python module, install with `pip install \
                listparser`')

    try:
        result = listparser.parse(path)
    except Exception as e:
        raise errors.AnsibleFilterError('error while parsing opml file: "%s"' %
                                        str(e))

    feeds = result['feeds']
    for index, feed in enumerate(feeds):
        feeds[index]['folder'] = [item for sublist in feed.pop('categories')
                                  for item in sublist]
    return feeds
Ejemplo n.º 37
0
 def import_opml(self, feed_url):
     feed = self.parse_feed(feed_url)
     success = []
     errors = []
     if 'opml' in feed['feed']:
         opml = listparser.parse(feed_url)
         for item in opml['feeds']:
             try:
                 feed = self.handle(item['url'])
                 success.append(feed)
             except (exceptions.FeedCriticalError, exceptions.TimeoutError) as exc:
                 errors.append((feed_url, exc))
     else:
         try:
             feed = self.handle(feed_url)
             success.append(feed)
         except (exceptions.FeedCriticalError, exceptions.TimeoutError) as exc:
             errors.append((feed_url, exc))
     return success, errors
Ejemplo n.º 38
0
def import_opml(subscriptions, opml):
    """Import a list of subscriptions from an OPML file."""
    subscribed_feeds = []
    imported_feeds = listparser.parse(opml)
    # Load the list of currently subscribed feeds
    with open(subscriptions, 'r') as f:
        for line in f:
            feed = line.strip()
            if feed.startswith("#") or len(feed) == 0:
                continue
            subscribed_feeds.append(feed)
    # Import any feeds we're not already subscribed to
    with open(subscriptions, 'a') as f:
        for feed in imported_feeds.feeds:
            if feed.url not in subscribed_feeds:
                print("Importing " + feed.title + "...")
                subscribed_feeds.append(feed.url)
                f.write(feed.url + "\n")
    sys.exit()
Ejemplo n.º 39
0
def upload_opml():
    if 'username' in session:
        try:
            urls = []
            f = request.files['file']
            # parse opml file
            outline = lp.parse(f)
            for site in outline.feeds:
                urls.append({"url": site.url, "category" : site.categories[0][0]})
            # return a JSON list of feed URLs
            return {
                "status" : "ok",
                "feeds" : urls
            }
        except Exception as e:
            return {
                "status" : "error",
                "error" : str(e)
            }

    else:
        abort(401)
Ejemplo n.º 40
0
def get_dead_feeds(filename, interval):
    fin = open(filename, 'r')
    opml = listparser.parse(fin)

    now = datetime.datetime.now()

    for f in opml.feeds:
        d = feedparser.parse(f.url)
        if 'title' in d.feed:
            if d.entries:
                entry = d.entries[0]
                date = get_date_word(entry)
                if date:
                    time_updated = datetime.datetime.fromtimestamp(time.mktime(entry[date]))
                    if now - time_updated > datetime.timedelta(days=interval):
                        print('MAYBE: The feed "{}" has not been modified in at least {} days. Url tried is {}'.format(f.title, interval, f.url))
                else:
                    print('MAYBE: The feed "{}"\'s most recent item has no information on when it was published. Url tried is {}'.format(f.title, f.url))
            else:
                print('DEAD: The feed "{}" appears to have zero posts. Url tried is {}'.format(f.title, f.url))
            
        else:
            print('DEAD: The feed "{}" is likely either dead or moved. Url tried is {}'.format(f.title, f.url))
Ejemplo n.º 41
0
def import_opml_cmd(args):
    import listparser
    l = listparser.parse(args.file)

    forced_rivers = []
    if args.river:
        forced_rivers = args.river.split(',')

    for item in l.feeds:
        rivers = forced_rivers
        if len(rivers) == 0:
            rivers = ['Main']
            if len(item.categories) > 0:
                rivers = [
                    ('/'.join(c for c in cats))
                    for cats in item.categories
                ]
        for river_name in rivers:
            print("Importing feed %s to river %s" % (
                item.url,
                river_name,
            ))
            add_river_and_feed(args.user, river_name, item.url)
Ejemplo n.º 42
0
def main():
    global NUM_VIDEOS
    global DESTINATION_FOLDER
    global API_KEY
    global FORMAT
    global FILE_FORMAT
    global SCHEDULING_MODE
    global SCHEDULING_MODE_VALUE


    number_of_runs_completed = 0
    did_i_just_complete_run = False
    minutes_to_wait = 0

    while True:
        print("Starting on run number %s" % number_of_runs_completed)
        logging.info("Starting on run number %s" % number_of_runs_completed)
        if SCHEDULING_MODE == "TIME_OF_DAY":
            logging.info("Evaluating time of day run for %s schedule mode" % SCHEDULING_MODE_VALUE)
            if did_i_just_complete_run:
                minutes_to_wait = 24 * 60
                logging.debug("  Just completed run, need to wait %s minutes" % minutes_to_wait)
                did_i_just_complete_run = False
            else:
                minutes_to_wait = (SCHEDULING_MODE_VALUE - datetime.now().hour) * 60
                if minutes_to_wait < 0:
                    minutes_to_wait += 24 * 60

                minutes_to_wait -= datetime.now().minute
                print("  First scheduled run set for %s minutes from now" % minutes_to_wait)

        elif SCHEDULING_MODE == "RUN_ONCE":
            logging.info("Evaluating run once schedule mode")
            if did_i_just_complete_run:
                logging.info("  Just completed run, ending")
                break
            else:
                logging.info("  Starting run once")

        elif SCHEDULING_MODE == "DELAY":
            logging.info("Evaluating delay schedule mode")
            if did_i_just_complete_run:
                minutes_to_wait = SCHEDULING_MODE_VALUE
                logging.info("  Next run in %s minutes" % minutes_to_wait)
            else:
                logging.info("  First run, doing it now")

        else:
            logging.info("Unknown SCHEDULING_MODE found %s" % SCHEDULING_MODE)
            #todo this should throw an exception
            break

        logging.info("Sleeping for %s minutes..." % minutes_to_wait)
        time.sleep(minutes_to_wait * 60)

        data = lp.parse("data/youtubeData.xml")

        # init for usage outside of this for loop
        xmltitle = [None] * len(data.feeds)
        xmlurl = [None] * len(data.feeds)
        channelIDlist = [None] * len(data.feeds)
        valid_chars = "-_.() %s%s" % (string.ascii_letters, string.digits)

        for i in range(0, len(data.feeds)):
            xmltitle[i] = data.feeds[i].title  # channel Title
            xmlurl[i] = data.feeds[
                i].url  # formatted like 'https://www.youtube.com/feeds/videos.xml?channel_id=CHANNELID'
            indexofid = xmlurl[i].find("id=")
            channelIDlist[i] = xmlurl[i][indexofid + 3:]

        get_icons(xmltitle, channelIDlist)

        for i in range(0, len(xmltitle)):  # for every channel
            uploader = xmltitle[i]
            print(uploader)
            url_data = urlopen(xmlurl[i],)
            url_data = url_data.read()
            xml = bs(url_data.decode('utf-8'), 'html.parser')
            videoList = xml.find_all('entry')
            # print(xml.find_all('entry'))

            video_download_count = 0
            for v in videoList:  # for every video in channel
                # make sure we only download how many we want
                if video_download_count < NUM_VIDEOS:
                    skip_download = False
                    video_download_count += 1
                    title = str(v.title.string)
                    #title = title.decode("utf-8")
                    #temp = title.encode("ascii", errors="ignore").decode('utf-8', 'ignore')
                    title = title.encode("utf-8", errors="ignore").decode('utf-8', 'ignore')
                    escapes = '|'.join([chr(char) for char in range(1, 32)])
                    title = re.sub(escapes, "", title)          # removes all escape characters
                    title = title.replace("-", " ").replace("\\", "").replace("/", "")

                    upload_time = v.published.string.split('T')[1].split('+')[0].replace(':', '-')
                    upload_date = v.published.string.split('T')[0]
                    upload_date = upload_date + "_" + upload_time
                    url = v.link.get('href')
                    id = v.id.string
                    channelID = str(v.find('yt:channelid').contents[0])
                    # See if we already downloaded this
                    logFile = open(logFileName, 'r')
                    logFileContents = logFile.read()
                    logFile.close()
                    if id in logFileContents:
                        logging.info("Video Already downloaded for id %s" % id)
                        print("Video Already downloaded: " + id)
                    else:
                        filename_format = parseFormat(FILE_FORMAT, uploader, upload_date, title, channelID, id.replace("yt:video:", ""))
                        logging.debug("filename_formatted parsed to %s" % filename_format)

                        logging.info("Downloading - " + title + "  |  " + id)
                        logging.info("Channel - " + str(xmltitle[i]) + "  |  " + channelID)

                        if os.name == 'nt':  # if windows use supplied ffmpeg
                            ydl_opts = {
                                'outtmpl': 'Download/' + uploader + '/' + filename_format + '.%(ext)s',
                            # need to put channelid in here because what youtube-dl gives may be incorrect
                                #'simulate': 'true',
                                'writethumbnail': 'true',
                                'forcetitle': 'true',
                                'ffmpeg_location': './ffmpeg/bin/',
                                'format': FORMAT
                            }
                        else:
                            # not sure here
                            ydl_opts = {
                                'outtmpl': 'Download/' + uploader + '/' + filename_format + '.%(ext)s',
                                'writethumbnail': 'true',
                                'forcetitle': 'true',
                                'format': FORMAT
                            }
                        try:
                            with youtube_dl.YoutubeDL(ydl_opts) as ydl:
                                info_dict = ydl.extract_info(url, download=False)
                                video_id = info_dict.get("id", None)
                                video_title = info_dict.get("title", None)
                                video_date = info_dict.get("upload_date", None)
                                uploader = info_dict.get("uploader", None)
                                is_live = info_dict.get("is_live", None)
                                if 'entries' in info_dict:
                                    is_live = info_dict['entries'][0]["is_live"]
                                if not is_live:
                                    ydl.download([url])
                                else:
                                    print("Warning! This video is streaming live, it will be skipped")
                                    logging.info("Warning! This video is streaming live, it will be skipped")
                                    skip_download = True

                        except Exception as e:
                            print("Failed to Download")
                            skip_download = True
                            logging.error(str(e))
                            logging.error(traceback.format_exc())
                            logVariables()

                        if not skip_download:
                            subscription_source_dir = 'Download/' + uploader + '/'
                            subscription_destination_dir = os.path.join(DESTINATION_FOLDER, uploader)
                            logging.debug("subscription_source_dir is %s" % subscription_source_dir)
                            logging.debug("subscription_destination_dir is %s" % subscription_destination_dir)

                            #destinationDir = parseFormat(DESTINATION_FORMAT, uploader, upload_date, title, channelID, id)
                            #destinationDir = os.path.join(DESTINATION_FOLDER, destinationDir)

                            if not os.path.exists(DESTINATION_FOLDER + uploader):
                                logging.info("Creating uploader destination directory for %s" % subscription_destination_dir)
                                os.makedirs(subscription_destination_dir)
                            try:
                                logging.info("Now moving content from %s to %s" % (subscription_source_dir, subscription_destination_dir))

                                for filename in os.listdir(subscription_source_dir):
                                    logging.info("Checking file %s" % filename)
                                    source_to_get = os.path.join(subscription_source_dir, filename)
                                    where_to_place = subscription_destination_dir
                                    logging.info("Moving file %s to %s" % (source_to_get, where_to_place))
                                    safecopy(source_to_get, where_to_place)
                                    #shutil.move(os.path.join(subscription_source_dir, filename), subscription_destination_dir)

                                shutil.rmtree(subscription_source_dir, ignore_errors=True)
                                # shutil.move(videoName, destination + destVideoName)
                                # shutil.move(thumbName, destination + destThumbName)
                                # everything was successful so log that we downloaded and moved the video
                                logFile = open(logFileName, 'a')
                                logFile.write(id + ' \n')
                                logFile.close()
                            except Exception as e:
                                print("An error occured moving file")
                                logging.error(str(e))
                                logging.error(traceback.format_exc())
                                logVariables()
                            print()

            print()

        number_of_runs_completed += 1
        did_i_just_complete_run = True
        logging.info("Program main.py ended")
        logging.info("============================================================")
Ejemplo n.º 43
0
        print(Fore.YELLOW + Style.BRIGHT + "MOVED TO {}".format(r.url) +
              Fore.RESET + Style.RESET_ALL)
        return None

    return r.text


if __name__ == "__main__":
    if len(sys.argv) < 2:
        print("Usage: {} OPML...".format(sys.argv[0]))
        sys.exit(1)

    for path in sys.argv[1:]:
        print(Fore.CYAN + Style.BRIGHT + "Checking OPML {}".format(path) +
              Fore.RESET + Style.RESET_ALL)
        result = listparser.parse(path)

        for feed in result.feeds:
            feed_text = get("feed", feed.title, feed.url)
            if feed_text is None:
                continue

            feed = feedparser.parse(feed_text)
            if feed.bozo:
                print(Fore.RED + Style.BRIGHT + "NG (FEED)" + Fore.RESET +
                      Style.RESET_ALL)
                continue

            #pprint(feed)
            latest = -1
            for entry in feed.entries:
Ejemplo n.º 44
0
 def request_ompl(self):
     self.opml = listparser.parse(self.opml_url)
Ejemplo n.º 45
0
def read_opml(filename):
    print "OPML: Reading OPML file: '%s'." % (filename,)
    result = listparser.parse(filename)
    urls = [ f.url for f in result.feeds ]
    #print urls
    return urls
Ejemplo n.º 46
0
#!/usr/bin/env python
import listparser as lp

d = lp.parse("podcasts_opml.xml")

f = open('podcasts.org', 'w')
f.write("|-|-|\n")
f.write("|url|Title|\n")
f.write("|-|-|\n")
for podcast in d.feeds:
    f.write("|%s| %s|\n" % (podcast.url, podcast.title))
f.write("|-|-|\n")

f.close()
Ejemplo n.º 47
0
# Generate list of IDs
podcasts = driver.find_elements_by_xpath('//ol/li')
pod_ids = []
for pod in podcasts:
    pod_ids.append(pod.get_attribute("id").replace("draggable_", "", 1))

# Send them to /dev/null
for pod in pod_ids:
    # important time delay for server response
    time.sleep(0.5)
    driver.get("http://mysqueezebox.com/settings/podcasts/delete/" + pod)

# Load local OPML file
try:
    opml_file = open(opml_path)
    opml_cont = opml_file.read()
except IOError as e:
    print "I/O error({0}): {1}".format(e.errno, e.strerror)
    sys.exit(1)

pods = lp.parse(opml_cont)

# Create new subscription list, one entry at a time
print("Creating new subscription list from OPML file")
for feed in pods.feeds:
    element = driver.find_element_by_xpath('//input[@name="url"]')
    element.clear()
    element.send_keys(feed.url)
    driver.find_element_by_xpath('//*[@id="add_button"]').click()
    time.sleep(0.7)
Ejemplo n.º 48
0
def opml():
    if ('action' in request.values
            and request.values['action'] in ['import', 'export']):
        if request.values['action'] == 'import':
            import_file = request.files.get('file')
            if not import_file or import_file.filename == '':
                flash('Warning: No File Selected')
            else:
                feed_list = listparser.parse(import_file)
                for feed in feed_list['feeds']:
                    url = feed['url']
                    if not Feed.get(url=url):
                        if 'title' in feed and feed['title']:
                            title = feed['title']
                        else:
                            parse = feedparser.parse(url)
                            if 'title' in parse.feed:
                                title = parse.feed.title
                            else:
                                title = url
                        new_feed = Feed(title=title, url=url)
                        for category_list in feed['categories']:
                            for title in category_list:
                                if title:
                                    category = Category.get(title=title)
                                    if not category:
                                        category = Category(title=title)
                                    new_feed.categories.add(category)
                flash('Feeds Imported!')
        elif request.values['action'] == 'export':
            root = etree.Element('opml', version='2.0')
            head = etree.SubElement(root, 'head')
            head_elements = {
                'title': 'feedfin OPML export',
                'dateCreated': format_datetime(datetime.utcnow()),
                'docs': 'http://dev.opml.org/spec2.html'
            }
            for element, text in head_elements.items():
                new_element = etree.SubElement(head, element)
                new_element.text = text
            body = etree.SubElement(root, 'body')
            for feed in Feed.select():
                new_element = etree.SubElement(
                    body,
                    'outline',
                    type='rss',
                    text=feed.title,
                    xmlUrl=feed.url,
                    category=','.join(
                        [category.title for category in feed.categories]))
            opml_bytes = etree.tostring(root,
                                        encoding='UTF-8',
                                        xml_declaration=True)
            response = make_response(opml_bytes.decode('utf-8'))
            response.headers['Content-Disposition'] = (
                'attachment; filename=feedfin.opml')
            return response

    else:
        flash('Warning: Invalid Request')

    return redirect(get_redirect_target())
Ejemplo n.º 49
0
Archivo: views.py Proyecto: sqroc/venus
def upload_file(request):
     UploadFile = request.FILES['file']
     result = listparser.parse(UploadFile.read())
     title = result.meta.title
     return render_to_response('result.html', locals())
Ejemplo n.º 50
0
        log.debug('Link={0} len(text)={1}'.format(
            entry.link, len(a.text)))
    except newspaper.article.ArticleException as e:
        log.warning('{0} {1}'.format(entry.link, e))

if __name__ == "__main__":
    urls = ['http://planet.scipy.org/rss20.xml',
            'http://planetpython.org/rss20.xml',
            'http://dsguide.biz/reader/feeds/posts']

    df = pd.read_csv('feeds.csv')
    df = df[df['Flag'] == 'Use']
    urls.extend(df['URL'].values)

    for f in os.listdir('opml'):
        if f.endswith('opml'):
            fname = os.path.join('opml', f)
            parsed_opml = listparser.parse(fname)
            urls.extend([feed.url for feed in parsed_opml.feeds])

    log = dl.log_api.conf_logger(__name__)
    config = core.init_config()

    corpus = dl.nlp.WebCorpus('sonar_corpus')

    for url in set(urls):
        rss = fp.parse(url)

        for entry in set(rss.entries):
            process_url(entry)
Ejemplo n.º 51
0
 def _process_file(self):
     result = listparser.parse(self.file, "feedshare.net")
     if result["bozo"] == 1 and not result["feeds"]:
         return False
     self._process_result(result)
     return True
Ejemplo n.º 52
0
#!pip install --upgrade pip
!pip install listparser

import listparser as lp

url = 'https://raw.githubusercontent.com/rushter/data-science-blogs/master/data-science.opml'
d = lp.parse(url)

len(d.feeds)
d.feeds[24].url
d.feeds[24].title

!pip install feedparser
import feedparser
import time

feed = feedparser.parse(d.feeds[24].url)
feed['feed']['title']
len(feed['entries'])


feed = feedparser.parse('http://dsguide.biz/reader/feeds/posts')
feed['feed']['title']
feed['entries'][1]
feed['entries'][1].title
feed['entries'][1].link
feed['entries'][1].summary
feed['entries'][3].published

dt = time.strptime(feed['entries'][3].published, '%a, %d %b %Y %H:%M:%S +0000')
Ejemplo n.º 53
0
def channel_selection(dataFile,
                      inputFile="data/subscription_manager.xml",
                      titleList=None,
                      idList=None):
    logging.debug("channel_selection function called")
    if titleList is not None:
        inputFile = None
    else:
        titleList = []
        idList = []

    import listparser as lp
    logging.debug("Channel_selection started")
    # This function parses OPML data and allows the user to select which channels to be included
    write("Parsing Youtube data...\n", BLUE)
    all_channels = False
    loop = True
    while loop:
        write(
            "Would you like to select which channels you want to include, or do you want to include all of them?\n"
            "If you include all channels you can remove them manually by editing "
            + dataFile + " and deleting the"
            " entire line of the channel you do not want (Choose this option if you have a lot of subscriptions)"
        )
        selection = get_input(
            "Enter 'all' to keep all subscriptions or 'select' to select which channels (or 'a' or 's'):"
        ).lower()

        logging.debug("User selected %s for all or single channel selection" %
                      selection)
        if selection == 'all' or selection == 'a':
            all_channels = True
            loop = False
            write("Including all channels\n")
        elif selection == 'select' or selection == 's':
            all_channels = False
            loop = False
            write(
                "You will now be asked to select which channels you would like to include in your download library. \n"
                "Any channels you do not include will be ignored.\n")
        else:
            write("Invalid Selection!!! Try again.")
            logging.warning("User selected invalid entry")

    logging.debug("Opening " + dataFile + " for writing")
    file = open(dataFile, 'w')
    # logging.debug("Parsing " + inputFile)
    file.write('<opml version="1.1">\n<body>\n')

    if inputFile is not None:
        d = lp.parse(inputFile)
        l = d.feeds

        for count, channel in enumerate(l):
            #titleList[count] = channel.title
            #idList[count] = channel.url
            titleList.append(channel.title)
            idList.append(channel.url)
    else:
        for count, channel in enumerate(idList):
            idList[
                count] = "https://www.youtube.com/feeds/videos.xml?channel_id=" + idList[
                    count]
    num_channels = len(titleList)

    human_count = 1
    logging.debug("Processing channels")
    for count in range(0, num_channels):
        include_this_subscription = True
        title = titleList[count].replace('&', 'and')
        title = title.encode("ascii",
                             errors="ignore").decode('utf-8', 'ignore')
        url = bytes(idList[count], 'utf-8').decode('utf-8', 'ignore')

        logging.debug("Processing channel: %s" % title)
        logging.debug("Channel has url %s" % url)

        if all_channels:
            write("(%i/%i) Including subscription: %s\n" %
                  (human_count, num_channels, title))
            logging.info("Automatically including channel: %s" % title)

        if not all_channels:
            loop = True
            while loop:
                selection = get_input(
                    "(%i/%i) Include %s, yes or no (y/n)?" %
                    (human_count, num_channels, title)).lower()
                if selection == 'y' or selection == 'yes':
                    include_this_subscription = True
                    write("   Including %s\n" % title)
                    logging.info("User opted to include channel: %s" % title)
                    loop = False
                elif selection == 'n' or selection == 'no':
                    include_this_subscription = False
                    logging.info("User opted to not include channel: %s" %
                                 title)
                    loop = False
                else:
                    write("   Invalid response. Try again.", RED)

        human_count += 1

        if include_this_subscription:
            file.write('<outline title="' + xml.sax.saxutils.escape(title) +
                       '" xmlUrl="' + xml.sax.saxutils.escape(url) + '"/>\n')
        else:
            write("   Not including %s\n" % title)

    file.write('</body>\n</opml>')
    file.close()
    logging.debug("Channels saved to" + dataFile)
    write("\nComplete.")
def main():
    global NUM_VIDEOS
    global DESTINATION_FOLDER
    global API_KEY
    global FORMAT
    global FILE_FORMAT
    global SCHEDULING_MODE
    global SCHEDULING_MODE_VALUE
    global YOUTUBE_XML_FILE

    data = lp.parse(YOUTUBE_XML_FILE)

    my_filters = filters()

    # init for usage outside of this for loop
    xmltitle = [None] * len(data.feeds)
    xmlurl = [None] * len(data.feeds)
    channelIDlist = [None] * len(data.feeds)
    valid_chars = "-_.() %s%s" % (string.ascii_letters, string.digits)

    for i in range(0, len(data.feeds)):
        xmltitle[i] = data.feeds[i].title  # channel Title
        xmlurl[i] = data.feeds[
            i].url  # formatted like 'https://www.youtube.com/feeds/videos.xml?channel_id=CHANNELID'
        indexofid = xmlurl[i].find("id=")
        channelIDlist[i] = xmlurl[i][indexofid + 3:]

    get_icons(xmltitle, channelIDlist)

    for i in range(0, len(xmltitle)):  # for every channel
        skip_download = False
        uploader = xmltitle[i]
        #print(uploader)
        try:
            url_data = urlopen(xmlurl[i], )
            url_data = url_data.read()
            xml = bs(url_data.decode('utf-8'), 'html.parser')
            videoList = xml.find_all('entry')
        except Exception as e:
            print("Failed to Download Channel list due to html error, check logs")
            videoList = ""
            skip_download = True
            logging.error(str(e))
            logging.error(traceback.format_exc())
            logVariables()

        video_download_count = 0
        for v in videoList:  # for every video in channel
            # make sure we only download how many we want
            if (video_download_count < NUM_VIDEOS) and not skip_download:
                skip_download = False
                skip_move = False
                video_download_count += 1

                title = str(v.title.string)
                #title = title.decode("utf-8")
                #temp = title.encode("ascii", errors="ignore").decode('utf-8', 'ignore')
                title = title.encode("utf-8", errors="ignore").decode('utf-8', 'ignore')
                escapes = '|'.join([chr(char) for char in range(1, 32)])
                title = re.sub(escapes, "", title)          # removes all escape characters
                title = title.replace("-", " ").replace("\\", "").replace("/", "").replace("%", "")

                upload_time = v.published.string.split('T')[1].split('+')[0].replace(':', '')[:-2]
                upload_date = v.published.string.split('T')[0]
                upload_date = upload_date + "_" + upload_time
                url = v.link.get('href')
                id = v.id.string
                channelID = str(v.find('yt:channelid').contents[0])
                # See if we already downloaded this
                logFile = open(logFileName, 'r')
                logFileContents = logFile.read()
                logFile.close()
                if id in logFileContents:
                    logging.info("Video Already downloaded for id %s" % id)
                    #print("Video Already downloaded: " + id)
                else:
                    if not my_filters.download_check(title, channelID):
                        #print("Video Filtered: " + title)
                        logging.info("Video Filtered: Title:" + title + "ChannelID:" + channelID)
                        skip_download = True
                        skip_move = True

                    filename_format = parseFormat(FILE_FORMAT, uploader, upload_date, title, channelID,
                        id.replace("yt:video:", ""))
                    logging.debug("filename_formatted parsed to %s" % filename_format)

                    if not skip_download:
                        logging.info("Downloading - " + title + "  |  " + id)
                        logging.info("Channel - " + str(xmltitle[i]) + "  |  " + channelID)
                        if os.name == 'nt':  # if windows use supplied ffmpeg
                            ydl_opts = {
                                'outtmpl': 'Download/' + uploader + '/' + filename_format + '.%(ext)s',
                            # need to put channelid in here because what youtube-dl gives may be incorrect
                                #'simulate': 'true',
                                'writethumbnail': 'true',
                                'forcetitle': 'true',
                                'ffmpeg_location': './ffmpeg/bin/',
                                'ignoreerrors': 'true',
                                'format': FORMAT
                            }
                        else:
                            # not sure here
                            ydl_opts = {
                                'outtmpl': 'Download/' + uploader + '/' + filename_format + '.%(ext)s',
                                'writethumbnail': 'true',
                                'forcetitle': 'true',
                                'format': FORMAT
                            }
                        try:
                            with youtube_dl.YoutubeDL(ydl_opts) as ydl:
                                info_dict = ydl.extract_info(url, download=False)
                                quality = info_dict.get("format", None)
                                print("Video Quality: " + quality)
                                video_id = info_dict.get("id", None)
                                video_title = info_dict.get("title", None)
                                video_date = info_dict.get("upload_date", None)
                                uploader = info_dict.get("uploader", None)
                                is_live = info_dict.get("is_live", None)
                                if 'entries' in info_dict:
                                    is_live = info_dict['entries'][0]["is_live"]
                                if not is_live:
                                    ydl.download([url])
                                else:
                                    print("Warning! This video is streaming live, it will be skipped")
                                    logging.info("Warning! This video is streaming live, it will be skipped")
                                    skip_move = True
                                    
                            if os.path.exists('Download/' + uploader + '/'):
                                for file in os.listdir('Download/' + uploader + '/'):
                                    if fnmatch.fnmatch(file, "*" + video_title + "*.part"):
                                        skip_move = True
                                        print("Failed to Download. Will Retry on next Run.")
                                        logging.error("Found .part file. Failed to Download. Will Retry next Run.")

                        except Exception as e:
                            print("Failed to Download")
                            skip_move = True
                            logging.error(str(e))
                            logging.error(traceback.format_exc())
                            logVariables()

                    if not skip_move:
                        subscription_source_dir = 'Download/' + uploader + '/'
                        subscription_destination_dir = os.path.join(DESTINATION_FOLDER, uploader)
                        logging.debug("subscription_source_dir is %s" % subscription_source_dir)
                        logging.debug("subscription_destination_dir is %s" % subscription_destination_dir)

                        #destinationDir = parseFormat(DESTINATION_FORMAT, uploader, upload_date, title, channelID, id)
                        #destinationDir = os.path.join(DESTINATION_FOLDER, destinationDir)

                        if not os.path.exists(DESTINATION_FOLDER + uploader):
                            logging.info("Creating uploader destination directory for %s" % subscription_destination_dir)
                            os.makedirs(subscription_destination_dir)
                        try:
                            logging.info("Now moving content from %s to %s" % (subscription_source_dir, subscription_destination_dir))

                            for filename in os.listdir(subscription_source_dir):
                                logging.info("Checking file %s" % filename)
                                source_to_get = os.path.join(subscription_source_dir, filename)
                                where_to_place = subscription_destination_dir
                                logging.info("Moving file %s to %s" % (source_to_get, where_to_place))
                                safecopy(source_to_get, where_to_place)
                                #shutil.move(os.path.join(subscription_source_dir, filename), subscription_destination_dir)

                            shutil.rmtree(subscription_source_dir, ignore_errors=True)
                            # shutil.move(videoName, destination + destVideoName)
                            # shutil.move(thumbName, destination + destThumbName)
                            # everything was successful so log that we downloaded and moved the video
                            logFile = open(logFileName, 'a')
                            logFile.write(id + ' \n')
                            logFile.close()
                        except Exception as e:
                            print("An error occured moving file")
                            logging.error(str(e))
                            logging.error(traceback.format_exc())
                            logVariables()

            skip_download = False
            skip_move = False

    logging.info("Program main.py ended")
    logging.info("============================================================")
    return ""
Ejemplo n.º 55
0
import sys
import listparser, requests

if len(sys.argv) > 1:
	xml = sys.argv[1]
	opml = listparser.parse(xml)
	print('found %s feeds in %s' % (len(opml.feeds), xml))

	for feed in opml.feeds:
		req = requests.post("http://localhost:3000/channels", data={'url' : feed.url})
		print('[%s]' % req.status_code, feed.url)
else:
	print('no opml file specified.')
Ejemplo n.º 56
0
def channel_selection():
    import listparser as lp
    logging.debug("Channel_selection started")
    # This function parses OPML data and allows the user to select which channels to be included
    print("Parsing Youtube data\n")
    all_channels = False
    loop = True
    while loop:
        selection = get_input(
            "Would you like to select which channels you want to include, or do you want to include all of them?\n"
            "If you include all channels you can remove them manually by editing data/youtubeData.xml and deleting the"
            " entire line of the channel you do not want (Choose this option if you have a lot of subscriptions)\n"
            "Enter 'all' to keep all subscriptions or 'select' to select which channels (or 'a' or 's'):").lower()

        logging.debug("User selected %s for all or single channel selection" % selection)
        if selection == 'all' or selection == 'a':
            all_channels = True
            loop = False
            print("Including all channels\n")
        elif selection == 'select' or selection == 's':
            all_channels = False
            loop = False
            print(
                "You will now be asked to select which channels you would like to include in your download library. \nAny"
                " channels you do not include will be ignored. \nWarning: if you add a new subscription you must go through this"
                " process again (until I add a feature to import a channel)\n")
        else:
            print("Invalid Selection!!! Try again.")
            logging.warning("User selected invalid entry")

    logging.debug("Opening data/youtubeData.xml for writing")
    file = open("data/youtubeData.xml", 'w')
    logging.debug("Parsing data/subscription_manager.xml")
    d = lp.parse('data/subscription_manager.xml')
    l = d.feeds
    file.write('<opml version="1.1">\n<body>\n')
    num_channels = len(l)
    human_count = 1

    logging.debug("Processing channels")
    for channel in l:
        include_this_subscription = True
        title = channel.title.replace('&', 'and')
        title = channel.title.encode("ascii", errors="ignore").decode('utf-8', 'ignore')
        url = bytes(channel.url, 'utf-8').decode('utf-8', 'ignore')

        logging.debug("Processing channel: %s" % title)
        logging.debug("Channel has url %s" % url)

        if all_channels:
            print("(%i/%i) Including subscription: %s\n" % (human_count, num_channels, title))
            logging.info("Automatically including channel: %s" % title)

        if not all_channels:
            loop = True
            while loop:
                selection = get_input(
                    "(%i/%i) Include %s, yes or no (y/n)?" % (human_count, num_channels, title)).lower()
                if selection == 'y' or selection == 'yes':
                    include_this_subscription = True
                    print("   Including %s\n" % title)
                    logging.info("User opted to include channel: %s" % title)
                    loop = False
                elif selection == 'n' or selection == 'no':
                    include_this_subscription = False
                    logging.info("User opted to not include channel: %s" % title)
                    loop = False
                else:
                    print("   Invalid response. Try again.")

        human_count += 1

        if include_this_subscription:
            file.write('<outline title="' + xml.sax.saxutils.escape(title) + '" xmlUrl="' + xml.sax.saxutils.escape(
                url) + '"/>\n')
        else:
            print("   Not including %s\n" % title)

    file.write('</body>\n</opml>')
    file.close()
    logging.debug("Channels saved to youtubeData.xml")
    print("\nComplete.")
Ejemplo n.º 57
0
def main(my_sch):
    global NUM_VIDEOS
    global DESTINATION_FOLDER
    global API_KEY
    global FORMAT
    global FILE_FORMAT
    global SCHEDULING_MODE
    global SCHEDULING_MODE_VALUE
    global YOUTUBE_XML_FILE

    data = lp.parse(YOUTUBE_XML_FILE)
    logFileName = "data/log.txt"

    my_filters = filters()

    # init for usage outside of this for loop
    xmltitle = [None] * len(data.feeds)
    xmlurl = [None] * len(data.feeds)
    channelIDlist = [None] * len(data.feeds)
    valid_chars = "-_.() %s%s" % (string.ascii_letters, string.digits)

    for i in range(0, len(data.feeds)):
        xmltitle[i] = slugify(data.feeds[i].title)  # channel Title
        xmlurl[i] = data.feeds[
            i].url  # formatted like 'https://www.youtube.com/feeds/videos.xml?channel_id=CHANNELID'
        indexofid = xmlurl[i].find("id=")
        channelIDlist[i] = xmlurl[i][indexofid + 3:]

    if my_sch.getNumRuns() == 1:
        get_icons(xmltitle, channelIDlist)

    for i in range(0, len(xmltitle)):  # for every channel
        skip_download = False
        uploader = xmltitle[i]
        # print(uploader)
        try:
            url_data = urlopen(xmlurl[i], )
            url_data = url_data.read()
            xml = bs(url_data.decode('utf-8'), 'html.parser')
            videoList = xml.find_all('entry')
        except Exception as e:
            print(Fore.RED + "Failed to Download Channel list due to html error, check logs" + Style.RESET_ALL)
            videoList = ""
            skip_download = True
            logging.error(str(e))
            logging.error(traceback.format_exc())
            logVariables()

        video_download_count = 0
        for v in videoList:  # for every video in channel
            # make sure we only download how many we want
            if (video_download_count < NUM_VIDEOS) and not skip_download:
                skip_download = False
                skip_move = False
                video_download_count += 1

                title = str(v.title.string)
                # title = title.decode("utf-8")
                # temp = title.encode("ascii", errors="ignore").decode('utf-8', 'ignore')
                title = title.encode("utf-8", errors="ignore").decode('utf-8', 'ignore')
                escapes = '|'.join([chr(char) for char in range(1, 32)])
                title = re.sub(escapes, "", title)  # removes all escape characters
                title = title.replace("-", " ").replace("\\", "").replace("/", "").replace("%", "")
                title = slugify(title)

                upload_time = v.published.string.split('T')[1].split('+')[0].replace(':', '')[:-2]
                upload_date = v.published.string.split('T')[0]
                upload_date = upload_date + "_" + upload_time
                url = v.link.get('href')
                id = v.id.string
                channelID = str(v.find('yt:channelid').contents[0])
                # See if we already downloaded this
                logFile = open(logFileName, 'r')
                logFileContents = logFile.read()
                logFile.close()
                if id in logFileContents:
                    logging.info("Video Already downloaded for id %s" % id)
                    # print("Video Already downloaded: " + id)
                else:
                    if not my_filters.download_check(title, channelID):
                        # print("Video Filtered: " + title)
                        logging.info("Video Filtered: Title:" + title + "ChannelID:" + channelID)
                        skip_download = True
                        skip_move = True

                    filename_format = parseFormat(FILE_FORMAT, uploader, upload_date, title, channelID,
                        id.replace("yt:video:", ""))
                    logging.debug("filename_formatted parsed to %s" % filename_format)

                    if not skip_download:
                        logging.info("Downloading - " + title + "  |  " + id)
                        logging.info("Channel - " + str(xmltitle[i]) + "  |  " + channelID)

                        # Get format codes to use
                        usable_extension = 'webm'
                       # usable_format_code_video = 'bestvideo[ext=webm]'
                       # usable_format_code_audio = 'bestaudio'
                        containsWebmContent = False

                        usable_format_code_audio = '(bestaudio[ext=m4a]/bestaudio)'
                        usable_format_code_video = '(bestvideo[vcodec^=av01][height>=2160][fps>30]/' \
                                              'bestvideo[vcodec=vp9.2][height>=2160][fps>30]/' \
                                              'bestvideo[vcodec=vp9][height>=2160][fps>30]/' \
                                              'bestvideo[vcodec^=av01][height>=2160]/' \
                                              'bestvideo[vcodec=vp9.2][height>=2160]/' \
                                              'bestvideo[vcodec=vp9][height>=2160]/' \
                                              'bestvideo[height>=2160]/' \
                                              'bestvideo[vcodec^=av01][height>=1440][fps>30]/' \
                                              'bestvideo[vcodec=vp9.2][height>=1440][fps>30]/' \
                                              'bestvideo[vcodec=vp9][height>=1440][fps>30]/' \
                                              'bestvideo[vcodec^=av01][height>=1440]/' \
                                              'bestvideo[vcodec=vp9.2][height>=1440]/' \
                                              'bestvideo[vcodec=vp9][height>=1440]/' \
                                              'bestvideo[height>=1440]/' \
                                              'bestvideo[vcodec^=av01][height>=1080][fps>30]/' \
                                              'bestvideo[vcodec=vp9.2][height>=1080][fps>30]/' \
                                              'bestvideo[vcodec=vp9][height>=1080][fps>30]/' \
                                              'bestvideo[vcodec^=av01][height>=1080]/' \
                                              'bestvideo[vcodec=vp9.2][height>=1080]/' \
                                              'bestvideo[vcodec=vp9][height>=1080]/' \
                                              'bestvideo[height>=1080]/' \
                                              'bestvideo[vcodec^=av01][height>=720][fps>30]/' \
                                              'bestvideo[vcodec=vp9.2][height>=720][fps>30]/' \
                                              'bestvideo[vcodec=vp9][height>=720][fps>30]/' \
                                              'bestvideo[vcodec^=av01][height>=720]/' \
                                              'bestvideo[vcodec=vp9.2][height>=720]/' \
                                              'bestvideo[vcodec=vp9][height>=720]/' \
                                              'bestvideo[height>=720]/' \
                                              'bestvideo)'

                        try:
                            if FORMAT.split(" ")[0] == 'best':
                                logging.info("Skipping getting format codes using granulated option")
                            else:
                                with youtube_dl.YoutubeDL() as ydl:
                                    info_dict = ydl.extract_info(url, download=False)
                                    formats = info_dict.get("formats", None)
                                    for f in formats:
                                        note = f.get('format_note')
                                        fID = f.get('format_id')
                                        extension = f.get('ext')

                                        if FORMAT.split(" ")[0] == note:
                                            usable_format_code_video = fID
                                            usable_extension = extension
                                            containsWebmContent = True
                                            break

                                    for f in formats:
                                        note = f.get('format_note')
                                        fID = f.get('format_id')
                                        extension = f.get('ext')

                                        if usable_extension == extension and note == 'audio only':
                                            usable_format_code_audio = fID

                                if not containsWebmContent:
                                    usable_format_code_video = 'bestvideo'
                                    usable_format_code_audio = 'bestaudio'

                        except Exception as e:
                            logging.error(str(e))
                            if str(e) == "ERROR: This video is unavailable.":
                                logging.error("This video is not available for download, "
                                              "maybe streaming or just an announcement post.")
                                write("This video is not available for download, "
                                      "maybe streaming or just an announcement post.", RED)
                                skip_download = True
                                skip_move = True
                            else:
                                logging.error("An error occurred trying to find user requested format,"
                                              " reverting to best")
                                usable_format_code_video = 'bestvideo'
                                usable_format_code_audio = 'bestaudio'
                                write("Couldn't find request format for this video, defaulting to best", RED)

                    if not skip_download:
                        if os.name == 'nt':  # if windows use supplied ffmpeg
                            ydl_opts = {
                                'outtmpl': os.path.join('Download', uploader, filename_format + '.%(ext)s'),
                                # need to put channelid in here because what youtube-dl gives may be incorrect
                                # 'simulate': 'true',
                                'writethumbnail': 'true',
                                'forcetitle': 'true',
                                'ffmpeg_location': './ffmpeg/bin/',
                                'ignoreerrors': 'true',
                                'format': usable_format_code_video + "+" + usable_format_code_audio + '/best'
                            }
                        else:
                            # Linux/Unix
                            ydl_opts = {
                                'outtmpl': os.path.join('Download', uploader, filename_format + '.%(ext)s'),
                                'writethumbnail': 'true',
                                'forcetitle': 'true',
                                'format': usable_format_code_video + "+" + usable_format_code_audio + '/best'
                            }
                        try:
                            with youtube_dl.YoutubeDL(ydl_opts) as ydl:
                                info_dict = ydl.extract_info(url, download=False)
                                quality = info_dict.get("format", None)
                                write("Video Quality: " + quality, BLUE)
                                video_id = info_dict.get("id", None)
                                video_title = info_dict.get("title", None)
                                video_date = info_dict.get("upload_date", None)
                                is_live = info_dict.get("is_live", None)
                                if 'entries' in info_dict:
                                    is_live = info_dict['entries'][0]["is_live"]
                                if not is_live:
                                    ydl.download([url])
                                else:
                                    write("Warning! This video is streaming live, it will be skipped", RED)
                                    logging.info("Warning! This video is streaming live, it will be skipped")
                                    skip_move = True

                            if os.path.exists('Download/' + uploader + '/'):
                                for file in os.listdir('Download/' + uploader + '/'):
                                    if fnmatch.fnmatch(file, "*" + video_title + "*.part"):
                                        skip_move = True
                                        write("Failed to Download. Will Retry on next Run.", RED)
                                        logging.error("Found .part file. Failed to Download. Will Retry next Run.")

                        except Exception as e:
                            skip_move = True
                            logging.error(str(e))

                            if str(e) == "ERROR: This video is unavailable.":
                                logging.error("This video is not available for download, "
                                              "maybe streaming or just an announcement post.")
                                write("This video is not available for download, "
                                      "maybe streaming or just an announcement post.", RED)
                            else:
                                logging.error("Failed to download video")
                                write("Failed to Download", RED)
                                logging.error(traceback.format_exc())
                                logVariables()

                    if not skip_move:
                        destinationDir = parseFormat(DESTINATION_FORMAT, uploader, upload_date, title, channelID, id)
                        destinationDir = os.path.join(DESTINATION_FOLDER, destinationDir)

                        subscription_source_dir = 'Download/' + uploader + '/'
                        logging.debug("subscription_source_dir is %s" % subscription_source_dir)
                        logging.debug("subscription_destination_dir is %s" % destinationDir)

                        if not os.path.exists(destinationDir):
                            logging.info(
                                "Creating uploader destination directory for %s" % destinationDir)
                            os.makedirs(destinationDir)
                        try:
                            logging.info("Now moving content from %s to %s" % (
                            subscription_source_dir, destinationDir))

                            for filename in os.listdir(subscription_source_dir):
                                logging.info("Checking file %s" % filename)
                                source_to_get = os.path.join(subscription_source_dir, filename)
                                logging.info("Moving file %s to %s" % (source_to_get, destinationDir))
                                safecopy(source_to_get, destinationDir)
                                # shutil.move(os.path.join(subscription_source_dir, filename), subscription_destination_dir)

                            shutil.rmtree(subscription_source_dir, ignore_errors=True)
                            # shutil.move(videoName, destination + destVideoName)
                            # shutil.move(thumbName, destination + destThumbName)
                            # everything was successful so log that we downloaded and moved the video
                            logFile = open(logFileName, 'a')
                            logFile.write(id + ' \n')
                            logFile.close()
                            logging.info("Successfully downloaded and moved file")
                            write("Success!", GREEN)
                        except Exception as e:
                            print(str(e))
                            write("An error occured moving file", RED)
                            logging.error(str(e))
                            logging.error(traceback.format_exc())
                            logVariables()

            skip_download = False
            skip_move = False

    logging.info("Program main.py ended")
    logging.info("============================================================")
    return ""
Ejemplo n.º 58
0
 def create_from_file(cls, file):
     opml = listparser.parse(file)
     print('found %s feeds' % (len(opml.feeds)))
     for feed in opml.feeds:
         cls.create(url=feed.url, title=feed.title)