Example #1
0
 def collect_feedback(cls):
     seen_posts = set()
     try:
         data = urllib2.urlopen('https://forum.newsblur.com/posts.json').read()
     except (urllib2.HTTPError), e:
         logging.debug(" ***> Failed to collect feedback: %s" % e)
         return
 def test_dont_query_myself(self):
     log.debug('test start')
     self.lookup.start()
     # Ongoing queries to (sorted: oldest first):
     # 155-4, 157-3, 
     # Queued nodes to query (sorted by log_distance to info_hash):
     # 158-1, 159-0
     # Notice 159-2 is kicked out from the queue
     eq_(self.lookup.num_parallel_queries, 2)
     nodes = [Node(tc.CLIENT_ADDR, self.lookup._my_id)]
     self.lookup._on_response(*_gen_nodes_args(
             tc.NODES_LD_IH[157][3],
             nodes))
     eq_(self.lookup._get_announce_candidates(),
         [tc.NODES_LD_IH[157][3],
          ])
     # This response triggers a new query to 158-1 (ignoring myself)
     eq_(self.lookup.num_parallel_queries, 2)
     # Ongoing queries to (sorted: oldest first):
     # 155-4, 158-1
     # Queued nodes to query (sorted by log_distance to info_hash):
     # 159-0
     self.lookup._on_timeout(tc.NODES_LD_IH[155][4])
     # This timeout triggers a new query (to 159-0)
     eq_(self.lookup.num_parallel_queries, 2) 
     self.lookup._on_timeout(tc.NODES_LD_IH[158][1])
     # No more nodes to send queries to
     eq_(self.lookup.num_parallel_queries, 1)
     ok_(not self.lookup.is_done)
     self.lookup._on_timeout(tc.NODES_LD_IH[159][0]) 
     # No more nodes to send queries to
     eq_(self.lookup.num_parallel_queries, 0)
     ok_(self.lookup.is_done)
    def test_different_delay(self):
#         NOTICE: this test might fail if your configuration
#         (interpreter/processor) is too slow
        
        task_delays = (1, 1, 1, .5, 1, 1, 2, 1, 1, 1,
                       1, 1.5, 1, 1, 1, 1, .3)
                       
        expected_list = ([],
                         ['a', 16, 3, 'b'], #9 is cancelled
                         ['a', 0, 1, 2, 4, 5, 7, 8, 10, 12, 13, 15, 'c', 'b'],
                         ['a', 11, 'c', 'b'],
                         ['a', 6, 'c', 'b'],
            )
        tasks = [Task(delay, self.callback_f, i) \
                 for i, delay in enumerate(task_delays)]
        for task in tasks:
            self.task_m.add(task)

        for i, expected in enumerate(expected_list):
            while True:
                task = self.task_m.consume_task()
                if task is None:
                    break
                task.fire_callbacks()
            log.debug('#: %d, result: %s, expected: %s' % (i,
                                              self.callback_order, expected))
            assert self.callback_order == expected
            self.callback_order = []
            self.task_m.add(Task(0, self.callback_f, 'a'))
            self.task_m.add(Task(.5, self.callback_f, 'b'))
            self.task_m.add(Task(1, self.callback_f, 'c'))
            time.sleep(.5)
            tasks[9].cancel() # too late (already fired) 
            tasks[14].cancel() # should be cancelled
 def test_cancel(self):
     for i in xrange(5):
         self.task_m.add(Task(.1, self.callback_f, i))
     c_task = Task(.1, self.callback_f, 5)
     self.task_m.add(c_task)
     for i in xrange(6,10):
         self.task_m.add(Task(.1, self.callback_f, i))
     while True:
         task = self.task_m.consume_task()
         if task is None:
             break
         task.fire_callback()
     log.debug('%s' % self.callback_order)
     assert self.callback_order == []
     ok_(not c_task.cancelled)
     c_task.cancel()
     ok_(c_task.cancelled)
     
     time.sleep(.1)
     while True:
         task = self.task_m.consume_task()
         if task is None:
             break
         task.fire_callbacks()
     log.debug('%s' % self.callback_order)
     assert self.callback_order == [0,1,2,3,4,  6,7,8,9]
Example #5
0
    def query(cls, feed_ids, query, order, offset, limit, strip=False):
        cls.create_elasticsearch_mapping()
        cls.ES.indices.refresh()
        
        if strip:
            query    = re.sub(r'([^\s\w_\-])+', ' ', query) # Strip non-alphanumeric
        sort     = "date:desc" if order == "newest" else "date:asc"
        string_q = pyes.query.QueryStringQuery(query, default_operator="AND")
        feed_q   = pyes.query.TermsQuery('feed_id', feed_ids[:1000])
        q        = pyes.query.BoolQuery(must=[string_q, feed_q])
        try:
            results  = cls.ES.search(q, indices=cls.index_name(), doc_types=[cls.type_name()],
                                     partial_fields={}, sort=sort, start=offset, size=limit)
        except pyes.exceptions.NoServerAvailable:
            logging.debug(" ***> ~FRNo search server available.")
            return []

        logging.info(" ---> ~FG~SNSearch ~FCstories~FG for: ~SB%s~SN (across %s feed%s)" % 
                     (query, len(feed_ids), 's' if len(feed_ids) != 1 else ''))
        
        try:
            result_ids = [r.get_id() for r in results]
        except pyes.InvalidQuery, e:
            logging.info(" ---> ~FRInvalid search query \"%s\": %s" % (query, e))
            return []
Example #6
0
 def _test_error(self):
     outgoing_error_msg = OutgoingErrorMsg(tc.TID, GENERIC_E)
     data = outgoing_error_msg.encode()
     tid, msg_type, msg_dict = decode(data)
     incoming_error_msg = IncomingErrorMsg(msg_dict)
     log.debug(incoming_error_msg.error)
     assert incoming_error_msg.error == GENERIC_E
Example #7
0
    def count_unreads_for_subscribers(self, feed):
        UNREAD_CUTOFF = datetime.datetime.utcnow() - datetime.timedelta(days=settings.DAYS_OF_UNREAD)
        user_subs = UserSubscription.objects.filter(
            feed=feed, active=True, user__profile__last_seen_on__gte=UNREAD_CUTOFF
        ).order_by("-last_read_date")

        for sub in user_subs:
            if not sub.needs_unread_recalc:
                sub.needs_unread_recalc = True
                sub.save()

        if self.options["compute_scores"]:
            stories_db = MStory.objects(story_feed_id=feed.pk, story_date__gte=UNREAD_CUTOFF)
            logging.debug(
                u"   ---> [%-30s] ~FYComputing scores: ~SB%s stories~SN with ~SB%s subscribers ~SN(%s/%s/%s)"
                % (
                    feed.title[:30],
                    stories_db.count(),
                    user_subs.count(),
                    feed.num_subscribers,
                    feed.active_subscribers,
                    feed.premium_subscribers,
                )
            )
            self.calculate_feed_scores_with_stories(user_subs, stories_db)
        elif self.options.get("mongodb_replication_lag"):
            logging.debug(
                u"   ---> [%-30s] ~BR~FYSkipping computing scores: ~SB%s seconds~SN of mongodb lag"
                % (feed.title[:30], self.options.get("mongodb_replication_lag"))
            )
 def on_response_received(self, response_msg, addr):
     # TYPE and TID already sanitized by rpc_manager
     log.debug('response received: %s' % repr(response_msg))
     try:
         addr_query_list = self.pending[addr]
     except (KeyError):
         log.warning('No pending queries for %s', addr)
         return # Ignore response
     # There are pending queries from node (let's find the right one (TID)
     query_found = False
     for query_index, query in enumerate(addr_query_list):
         log.debug('response node: %s, query:\n(%s, %s)' % (
             `addr`,
             `query.tid`,
             `query.query`))
         if query.matching_tid(response_msg.tid):
             query_found = True
             break
     if not query_found:
         log.warning('No query for this response\n%s\nsource: %s' % (
             response_msg, addr))
         return # ignore response 
     # This response matches query. Trigger query's callback
     response_is_ok = query.on_response_received(response_msg)
     if response_is_ok:
         # Remove this query from pending
         if len(addr_query_list) == 1:
             # There is one item in the list. Remove the whole list.
             del self.pending[addr]
         else:
             del addr_query_list[query_index]
     else:
         log.warning('Bad response from %r\n%r' % (addr,
                                                       response_msg))
Example #9
0
def main():
    lang = 'zh'
    if len(sys.argv) == 2:
        lang = sys.argv[1]

    cd = sys.path[0]
    translation_path = os.path.join(cd, '../translation')

    # load lua
    pregame_file = os.path.join(translation_path, 'en_pregame.lua')
    client_file = os.path.join(translation_path, 'en_client.lua')

    ui_mgr = UiMgr()
    log.debug('loading lua file %s' % pregame_file)
    ui_mgr.load_lua_file(pregame_file)
    log.debug('loading lua file %s' % client_file)
    ui_mgr.load_lua_file(client_file)
    log.info('read %d lines.' % len(ui_mgr.ui_lines))

    # save merged lines
    translate_file = os.path.join(translation_path, '%s_translate.txt' % lang)
    if os.path.exists(translate_file):
        choose = input('%s_translate.txt file exists, merge? [y/N]' % lang)
        choose = choose.lower().strip()
        if choose != '' and choose[0] == 'y':
            log.info('merging to translate file.')
            ui_mgr.apply_translate_from_txt_file(translate_file)
        else:
            log.info('skipped.')
            return

    with open(translate_file, 'wt', encoding='utf-8') as fp:
        fp.writelines(ui_mgr.get_txt_lines(replace=True))
        log.info('save translate file succeed.')
Example #10
0
        def _2(*args, **kw):
            class Dispatch(threading.Thread):
                def __init__(self):
                    threading.Thread.__init__(self)
                    self.result = None
                    self.error = None
                    
                    self.setDaemon(True)
                    self.start()

                def run(self):
                    try:
                        self.result = function(*args, **kw)
                    except:
                        self.error = sys.exc_info()
            c = Dispatch()
            c.join(timeout)
            if c.isAlive():
                raise TimeoutError, 'took too long'
            if c.error:
                tb = ''.join(traceback.format_exception(c.error[0], c.error[1], c.error[2]))
                logging.debug(tb)
                mail_admins('Error in timeout: %s' % c.error[0], tb)
                raise c.error[0], c.error[1], c.error[2]
            return c.result
Example #11
0
    def count_unreads_for_subscribers(self, feed):
        UNREAD_CUTOFF = datetime.datetime.utcnow() - datetime.timedelta(days=settings.DAYS_OF_UNREAD)
        user_subs = UserSubscription.objects.filter(feed=feed, 
                                                    active=True,
                                                    user__profile__last_seen_on__gte=UNREAD_CUTOFF)\
                                            .order_by('-last_read_date')
        
        if not user_subs.count():
            return
            
        for sub in user_subs:
            if not sub.needs_unread_recalc:
                sub.needs_unread_recalc = True
                sub.save()

        if self.options['compute_scores']:
            stories = MStory.objects(story_feed_id=feed.pk,
                                     story_date__gte=UNREAD_CUTOFF)\
                            .read_preference(pymongo.ReadPreference.PRIMARY)
            stories = Feed.format_stories(stories, feed.pk)
            logging.debug(u'   ---> [%-30s] ~FYComputing scores: ~SB%s stories~SN with ~SB%s subscribers ~SN(%s/%s/%s)' % (
                          feed.title[:30], len(stories), user_subs.count(),
                          feed.num_subscribers, feed.active_subscribers, feed.premium_subscribers))        
            self.calculate_feed_scores_with_stories(user_subs, stories)
        elif self.options.get('mongodb_replication_lag'):
            logging.debug(u'   ---> [%-30s] ~BR~FYSkipping computing scores: ~SB%s seconds~SN of mongodb lag' % (
              feed.title[:30], self.options.get('mongodb_replication_lag')))
Example #12
0
    def add_missing_feeds(self):
        all_feeds = self.flat()
        subs = [us.feed_id for us in
                UserSubscription.objects.filter(user=self.user).only('feed')]
        
        missing_subs = set(all_feeds) - set(subs)
        if missing_subs:
            logging.debug(" ---> %s is missing %s subs. Adding %s..." % (
                          self.user, len(missing_subs), missing_subs))
            for feed_id in missing_subs:
                feed = Feed.get_by_id(feed_id)
                if feed:
                    us, _ = UserSubscription.objects.get_or_create(user=self.user, feed=feed, defaults={
                        'needs_unread_recalc': True
                    })
                    if not us.needs_unread_recalc:
                        us.needs_unread_recalc = True
                        us.save()

        missing_folder_feeds = set(subs) - set(all_feeds)
        if missing_folder_feeds:
            user_sub_folders = json.decode(self.folders)
            logging.debug(" ---> %s is missing %s folder feeds. Adding %s..." % (
                          self.user, len(missing_folder_feeds), missing_folder_feeds))
            for feed_id in missing_folder_feeds:
                feed = Feed.get_by_id(feed_id)
                if feed and feed.pk == feed_id:
                    user_sub_folders = add_object_to_folder(feed_id, "", user_sub_folders)
            self.folders = json.encode(user_sub_folders)
            self.save()
Example #13
0
 def fetch_image_from_page_data(self):
     image = None
     image_file = None
     if self.page_data:
         content = self.page_data
     elif settings.BACKED_BY_AWS.get('pages_on_s3') and self.feed.s3_page:
         key = settings.S3_PAGES_BUCKET.get_key(self.feed.s3_pages_key)
         compressed_content = key.get_contents_as_string()
         stream = StringIO(compressed_content)
         gz = gzip.GzipFile(fileobj=stream)
         try:
             content = gz.read()
         except IOError:
             content = None
     else:
         content = MFeedPage.get_data(feed_id=self.feed.pk)
     url = self._url_from_html(content)
     if not url:
         try:
             content = requests.get(self.feed.feed_link).content
             url = self._url_from_html(content)
         except (AttributeError, SocketError, requests.ConnectionError,
                 requests.models.MissingSchema, requests.sessions.InvalidSchema,
                 requests.sessions.TooManyRedirects,
                 requests.models.InvalidURL,
                 requests.models.ChunkedEncodingError,
                 requests.models.ContentDecodingError,
                 LocationParseError, OpenSSLError, PyAsn1Error), e:
             logging.debug(" ---> ~SN~FRFailed~FY to fetch ~FGfeed icon~FY: %s" % e)
Example #14
0
 def process_response(self, request, response):
     if not self.activated(request):
         return response
     if connection.queries:
         time_elapsed = sum([float(q["time"]) for q in connection.queries])
         queries = connection.queries
         for query in queries:
             if query.get("mongo"):
                 query["sql"] = "~FM%s: %s" % (query["mongo"]["collection"], query["mongo"]["query"])
             elif query.get("redis"):
                 query["sql"] = "~FC%s" % (query["redis"]["query"])
             else:
                 query["sql"] = re.sub(r"SELECT (.*?) FROM", "SELECT * FROM", query["sql"])
                 query["sql"] = re.sub(r"SELECT", "~FYSELECT", query["sql"])
                 query["sql"] = re.sub(r"INSERT", "~FGINSERT", query["sql"])
                 query["sql"] = re.sub(r"UPDATE", "~FY~SBUPDATE", query["sql"])
                 query["sql"] = re.sub(r"DELETE", "~FR~SBDELETE", query["sql"])
         t = Template(
             "{% for sql in sqllog %}{% if not forloop.first %}                  {% endif %}[{{forloop.counter}}] ~FC{{sql.time}}s~FW: {{sql.sql|safe}}{% if not forloop.last %}\n{% endif %}{% endfor %}"
         )
         if settings.DEBUG:
             logging.debug(t.render(Context({"sqllog": queries, "count": len(queries), "time": time_elapsed})))
         times_elapsed = {
             "sql": sum([float(q["time"]) for q in queries if not q.get("mongo") and not q.get("redis")]),
             "mongo": sum([float(q["time"]) for q in queries if q.get("mongo")]),
             "redis": sum([float(q["time"]) for q in queries if q.get("redis")]),
         }
         setattr(request, "sql_times_elapsed", times_elapsed)
     return response
Example #15
0
    def save_page(self, html):
        if html and len(html) > 100:
            if settings.BACKED_BY_AWS.get('pages_on_s3'):
                k = Key(settings.S3_PAGES_BUCKET)
                k.key = self.feed.s3_pages_key
                k.set_metadata('Content-Encoding', 'gzip')
                k.set_metadata('Content-Type', 'text/html')
                k.set_metadata('Access-Control-Allow-Origin', '*')
                out = StringIO.StringIO()
                f = gzip.GzipFile(fileobj=out, mode='w')
                f.write(html)
                f.close()
                compressed_html = out.getvalue()
                k.set_contents_from_string(compressed_html)
                k.set_acl('public-read')
                
                try:
                    feed_page = MFeedPage.objects.get(feed_id=self.feed.pk)
                    feed_page.delete()
                    logging.debug('   --->> [%-30s] ~FYTransfering page data to S3...' % (self.feed))
                except MFeedPage.DoesNotExist:
                    pass

                self.feed.s3_page = True
                self.feed.save()
            else:
                try:
                    feed_page = MFeedPage.objects.get(feed_id=self.feed.pk)
                    feed_page.page_data = html
                    feed_page.save()
                except MFeedPage.DoesNotExist:
                    feed_page = MFeedPage.objects.create(feed_id=self.feed.pk, page_data=html)
                return feed_page
Example #16
0
    def check_urls_against_pushed_data(self, parsed):
        if hasattr(parsed.feed, 'links'): # single notification
            hub_url = self.hub
            self_url = self.topic
            for link in parsed.feed.links:
                href = link.get('href', '')
                if any(w in href for w in ['wp-admin', 'wp-cron']):
                    continue
                    
                if link['rel'] == 'hub':
                    hub_url = link['href']
                elif link['rel'] == 'self':
                    self_url = link['href']

            needs_update = False
            if hub_url and self.hub != hub_url:
                # hub URL has changed; let's update our subscription
                needs_update = True
            elif self_url != self.topic:
                # topic URL has changed
                needs_update = True

            if needs_update:
                logging.debug(u'   ---> [%-30s] ~FR~BKUpdating PuSH hub/topic: %s / %s' % (
                              unicode(self.feed)[:30], hub_url, self_url))
                expiration_time = self.lease_expires - datetime.now()
                seconds = expiration_time.days*86400 + expiration_time.seconds
                PushSubscription.objects.subscribe(
                    self_url, feed=self.feed, hub=hub_url,
                    lease_seconds=seconds)
Example #17
0
 def fetch(self):
     """ 
     Uses feedparser to download the feed. Will be parsed later.
     """
     identity = self.get_identity()
     log_msg = u'%2s ---> [%-30s] Fetching feed (%d)' % (identity,
                                                         unicode(self.feed)[:30],
                                                         self.feed.id)
     logging.debug(log_msg)
                                              
     self.feed.set_next_scheduled_update()
     etag=self.feed.etag
     modified = self.feed.last_modified.utctimetuple()[:7] if self.feed.last_modified else None
     
     if self.options.get('force') or not self.feed.fetched_once:
         modified = None
         etag = None
         
     USER_AGENT = 'NewsBlur Feed Fetcher (%s subscriber%s) - %s' % (
         self.feed.num_subscribers,
         's' if self.feed.num_subscribers != 1 else '',
         URL
     )
     self.fpf = feedparser.parse(self.feed.feed_address,
                                 agent=USER_AGENT,
                                 etag=etag,
                                 modified=modified)
     
     return FEED_OK, self.fpf
Example #18
0
def do_backup(schedule, follow_links):
    '''Handles the backup.'''
    
    from shutil import rmtree
    import utils.filesystem

    if schedule == 'daily':
        backup_list = config.daily_backup_list
    elif schedule == 'weekly':
        backup_list = config.weekly_backup_list
    else:
        backup_list = config.monthly_backup_list

    try:
        files = utils.filesystem.read_file_list(backup_list)
        archive_path, tar_type = create_archive(files, follow_links)
        if config.enc_backup == True:
            # We don't add the enc extension to the key - the metadata
            # will tell us whether the archive is encrypted.
            enc_file = utils.encrypt.encrypt_file(config.enc_key,
                    archive_path, config.enc_piece_size)
            send_backup(enc_file, tar_type, schedule)
            # Delete the plaintext local version
            os.remove(archive_path)
        else: # Not encrypting
            send_backup(archive_path, tar_type, schedule)

        if config.delete_archive_when_finished == True:
            log.debug('Deleting archive.')
            rmtree(config.dest_location)
    except IOError:
        raise
        log.critical('Cannot open file: %s' % backup_list)
        sys.exit(1) 
Example #19
0
 def fetch(self):
     """ Downloads and parses a feed.
     """
     socket.setdefaulttimeout(30)
     identity = self.get_identity()
     log_msg = u'%2s ---> [%-30s] Fetching feed (%d)' % (identity,
                                              unicode(self.feed)[:30],
                                              self.feed.id)
     logging.debug(log_msg)
                                              
     # Check if feed still needs to be updated
     # feed = Feed.objects.get(pk=self.feed.pk)
     # if feed.next_scheduled_update > datetime.datetime.now() and not self.options.get('force'):
     #     log_msg = u'        ---> Already fetched %s (%d)' % (self.feed.feed_title,
     #                                                          self.feed.id)
     #     logging.debug(log_msg)
     #     feed.save_feed_history(303, "Already fetched")
     #     return FEED_SAME, None
     # else:
     self.feed.set_next_scheduled_update()
         
     etag=self.feed.etag
     modified = self.feed.last_modified.utctimetuple()[:7] if self.feed.last_modified else None
     
     if self.options.get('force'):
         modified = None
         etag = None
         
     self.fpf = feedparser.parse(self.feed.feed_address,
                                 agent=USER_AGENT,
                                 etag=etag,
                                 modified=modified)
     
     return FEED_OK, self.fpf
Example #20
0
 def get(self, *args, **kwargs):
     try:
         return super(UserSubscriptionManager, self).get(*args, **kwargs)
     except self.model.DoesNotExist:
         if isinstance(kwargs.get('feed'), int):
             feed_id = kwargs.get('feed')
         elif 'feed' in kwargs:
             feed_id = kwargs['feed'].pk
         elif 'feed__pk' in kwargs:
             feed_id = kwargs['feed__pk']
         elif 'feed_id' in kwargs:
             feed_id = kwargs['feed_id']
         dupe_feed = DuplicateFeed.objects.filter(duplicate_feed_id=feed_id)
         if dupe_feed:
             feed = dupe_feed[0].feed
             if 'feed' in kwargs: 
                 kwargs['feed'] = feed
             elif 'feed__pk' in kwargs:
                 kwargs['feed__pk'] = feed.pk
             elif 'feed_id' in kwargs:
                 kwargs['feed_id'] = feed.pk
             user = kwargs.get('user')
             if isinstance(user, int):
                 user = User.objects.get(pk=user)
             logging.debug(" ---> [%s] ~BRFound dupe UserSubscription: ~SB%s (%s)" % (user and user.username, feed, feed_id))
             return super(UserSubscriptionManager, self).get(*args, **kwargs)
         else:
             exc_info = sys.exc_info()
             raise exc_info[0], None, exc_info[2]
Example #21
0
def create_zip(archive, files):
    '''Creates a zip file containing the files being backed up.'''
    import zipfile
    from utils.misc import add_file_hash

    try:
        # zipfile always follows links
        with zipfile.ZipFile(archive, 'w') as zipf:
            zipf.comment = 'Created by s3-backup'
            for f in files:
                f = f.strip()
                if os.path.exists(f):
                    zipf.write(f)
                    add_file_hash(archive, f)
                    log.debug('Added %s.' % f)
                else:
                    log.error('%s does not exist.' % f)
                
                if zipf.testzip() != None:
                    log.error('An error occured creating the zip archive.')
    except zipfile.BadZipfile:
        # I assume this only happens on reads? Just in case...
        log.critical('The zip file is corrupt.')
    except zipfile.LargeZipFile:
        log.critical('The zip file is greater than 2 GB.'
                ' Enable zip64 functionality.')
Example #22
0
 def save_page(self, html):
     saved = False
     
     if not html or len(html) < 100:
         return
     
     if settings.BACKED_BY_AWS.get('pages_on_node'):
         saved = self.save_page_node(html)
         if saved and self.feed.s3_page and settings.BACKED_BY_AWS.get('pages_on_s3'):
             self.delete_page_s3()
         
     if settings.BACKED_BY_AWS.get('pages_on_s3') and not saved:
         saved = self.save_page_s3(html)
         
     if not saved:
         try:
             feed_page = MFeedPage.objects.get(feed_id=self.feed.pk)
             # feed_page.page_data = html.encode('utf-8')
             if feed_page.page() == html:
                 logging.debug('   ---> [%-30s] ~FYNo change in page data: %s' % (self.feed.log_title[:30], self.feed.feed_link))
             else:
                 feed_page.page_data = html
                 feed_page.save()
         except MFeedPage.DoesNotExist:
             feed_page = MFeedPage.objects.create(feed_id=self.feed.pk, page_data=html)
         return feed_page
Example #23
0
    def fetch(self):
        """ 
        Uses feedparser to download the feed. Will be parsed later.
        """
        identity = self.get_identity()
        log_msg = u'%2s ---> [%-30s] ~FYFetching feed (~FB%d~FY), last update: %s' % (identity,
                                                            unicode(self.feed)[:30],
                                                            self.feed.id,
                                                            datetime.datetime.now() - self.feed.last_update)
        logging.debug(log_msg)
                                                 
        self.feed.set_next_scheduled_update()
        etag=self.feed.etag
        modified = self.feed.last_modified.utctimetuple()[:7] if self.feed.last_modified else None
        
        if self.options.get('force') or not self.feed.fetched_once:
            modified = None
            etag = None
            
        USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_1) AppleWebKit/534.48.3 (KHTML, like Gecko) Version/5.1 Safari/534.48.3 (NewsBlur Feed Fetcher - %s subscriber%s - %s)' % (
            self.feed.num_subscribers,
            's' if self.feed.num_subscribers != 1 else '',
            settings.NEWSBLUR_URL
        )

        self.fpf = feedparser.parse(self.feed.feed_address,
                                    agent=USER_AGENT,
                                    etag=etag,
                                    modified=modified)
        
        return FEED_OK, self.fpf
Example #24
0
    def count_unreads_for_subscribers(self, feed):
        UNREAD_CUTOFF = datetime.datetime.utcnow() - datetime.timedelta(days=settings.DAYS_OF_UNREAD)
        user_subs = UserSubscription.objects.filter(
            feed=feed, active=True, user__profile__last_seen_on__gte=UNREAD_CUTOFF
        ).order_by("-last_read_date")
        logging.debug(
            u"   ---> [%-30s] Computing scores: %s (%s/%s/%s) subscribers"
            % (
                unicode(feed)[:30],
                user_subs.count(),
                feed.num_subscribers,
                feed.active_subscribers,
                feed.premium_subscribers,
            )
        )

        stories_db = MStory.objects(story_feed_id=feed.pk, story_date__gte=UNREAD_CUTOFF)
        for sub in user_subs:
            cache.delete("usersub:%s" % sub.user_id)
            sub.needs_unread_recalc = True
            sub.save()

        if self.options["compute_scores"]:
            for sub in user_subs:
                silent = False if self.options["verbose"] >= 2 else True
                sub.calculate_feed_scores(silent=silent, stories_db=stories_db)
Example #25
0
def mark_story_as_read(request):
    story_ids = request.REQUEST.getlist('story_id')
    feed_id = int(request.REQUEST['feed_id'])
    
    usersub = UserSubscription.objects.select_related('feed').get(user=request.user, feed=feed_id)
    if not usersub.needs_unread_recalc:
        usersub.needs_unread_recalc = True
        usersub.save()
        
    data = dict(code=0, payload=story_ids)
    
    if len(story_ids) > 1:
        logging.debug(" ---> [%s] Read %s stories in feed: %s" % (request.user, len(story_ids), usersub.feed))
    else:
        logging.debug(" ---> [%s] Read story in feed: %s" % (request.user, usersub.feed))
        
    for story_id in story_ids:
        story = MStory.objects(story_feed_id=feed_id, story_guid=story_id)[0]
        now = datetime.datetime.utcnow()
        m = MUserStory(story=story, user_id=request.user.pk, feed_id=feed_id, read_date=now)
        try:
            m.save()
        except OperationError:
            logging.info(' ---> [%s] *** Marked story as read: Duplicate Story -> %s' % (request.user, story_id))
    
    return data
Example #26
0
 def process_response(self, request, response): 
     if not self.activated(request): return response
     if connection.queries:
         time_elapsed = sum([float(q['time']) for q in connection.queries])
         queries = connection.queries
         for query in queries:
             if query.get('mongo'):
                 query['sql'] = "~FM%s: %s" % (query['mongo']['collection'], query['mongo']['query'])
             elif query.get('redis'):
                 query['sql'] = "~FC%s" % (query['redis']['query'])
             else:
                 query['sql'] = re.sub(r'SELECT (.*?) FROM', 'SELECT * FROM', query['sql'])
                 query['sql'] = re.sub(r'SELECT', '~FYSELECT', query['sql'])
                 query['sql'] = re.sub(r'INSERT', '~FGINSERT', query['sql'])
                 query['sql'] = re.sub(r'UPDATE', '~FY~SBUPDATE', query['sql'])
                 query['sql'] = re.sub(r'DELETE', '~FR~SBDELETE', query['sql'])
         t = Template("{% for sql in sqllog %}{% if not forloop.first %}                  {% endif %}[{{forloop.counter}}] ~FC{{sql.time}}s~FW: {{sql.sql|safe}}{% if not forloop.last %}\n{% endif %}{% endfor %}")
         if settings.DEBUG:
             logging.debug(t.render(Context({
                 'sqllog': queries,
                 'count': len(queries),
                 'time': time_elapsed,
             })))
         times_elapsed = {
             'sql': sum([float(q['time']) 
                        for q in queries if not q.get('mongo') and 
                                            not q.get('redis')]),
             'mongo': sum([float(q['time']) for q in queries if q.get('mongo')]),
             'redis': sum([float(q['time']) for q in queries if q.get('redis')]),
         }
         setattr(request, 'sql_times_elapsed', times_elapsed)
     return response
Example #27
0
    def query(cls, text):
        try:
            cls.ES.default_indices = cls.index_name()
            cls.ES.indices.refresh()
        except pyes.exceptions.NoServerAvailable:
            logging.debug(" ***> ~FRNo search server available.")
            return []
        
        logging.info("~FGSearch ~FCfeeds~FG by address: ~SB%s" % text)
        q = MatchQuery('address', text, operator="and", type="phrase")
        results = cls.ES.search(query=q, sort="num_subscribers:desc", size=5,
                                doc_types=[cls.type_name()])

        if not results.total:
            logging.info("~FGSearch ~FCfeeds~FG by title: ~SB%s" % text)
            q = MatchQuery('title', text, operator="and")
            results = cls.ES.search(query=q, sort="num_subscribers:desc", size=5,
                                    doc_types=[cls.type_name()])
            
        if not results.total:
            logging.info("~FGSearch ~FCfeeds~FG by link: ~SB%s" % text)
            q = MatchQuery('link', text, operator="and")
            results = cls.ES.search(query=q, sort="num_subscribers:desc", size=5,
                                    doc_types=[cls.type_name()])
            
        return results
Example #28
0
    def add_file_by_id(self, translation_path, file_id_str):
        """添加一个文件

        Args:
            translation_path (str): 翻译文件的路径
            file_id_str (str): 文件 id
        """
        lang_groups = {}
        # 英文
        file_path = os.path.join(translation_path, 'en.%s.lang.csv' % file_id_str)
        if os.path.isfile(file_path):
            for line in load_lang_csv(file_path, skip_header=False):
                file_id, unknown, index, offset = [int(v) for v in line[0:4]]
                origin = line[4]
                if index not in lang_groups.keys():
                    # 如果是新出现的 index,则新建
                    lang_groups[index] = LangGroup(index)
                lang_groups[index].add(file_id, unknown, index, offset, origin)
        # 日文
        file_path_jp = os.path.join(translation_path, 'jp.%s.lang.csv' % file_id_str)
        if os.path.isfile(file_path_jp):
            for line in load_lang_csv(file_path_jp, skip_header=False):
                file_id, unknown, index, offset = [int(v) for v in line[0:4]]
                origin_jp = line[4]
                if index not in lang_groups.keys():
                    # 如果是新出现的 index,则舍弃
                    log.debug('new index from jp: %s' % str(line[0:4]))
                    continue
                lang_groups[index].add_jp(file_id, unknown, index, offset, origin_jp)
        # 添加
        self.all_lang_groups[file_id_str] = lang_groups
Example #29
0
    def count_unreads_for_subscribers(self, feed):
        UNREAD_CUTOFF = datetime.datetime.utcnow() - datetime.timedelta(days=settings.DAYS_OF_UNREAD)
        user_subs = UserSubscription.objects.filter(feed=feed, 
                                                    active=True,
                                                    user__profile__last_seen_on__gte=UNREAD_CUTOFF)\
                                            .order_by('-last_read_date')
        logging.debug(u'   ---> [%-30s] Computing scores: %s (%s/%s/%s) subscribers' % (
                      unicode(feed)[:30], user_subs.count(),
                      feed.num_subscribers, feed.active_subscribers, feed.premium_subscribers))
        
        if self.options['slave_db']:
            slave_db = self.options['slave_db']

            stories_db_orig = slave_db.stories.find({
                "story_feed_id": feed.pk,
                "story_date": {
                    "$gte": UNREAD_CUTOFF,
                },
            })
            stories_db = []
            for story in stories_db_orig:
                stories_db.append(bunch(story))
        else:
            stories_db = MStory.objects(story_feed_id=feed.pk,
                                        story_date__gte=UNREAD_CUTOFF)
        for sub in user_subs:
            cache.delete('usersub:%s' % sub.user_id)
            sub.needs_unread_recalc = True
            sub.save()
            
        if self.options['compute_scores']:
            for sub in user_subs:
                silent = False if self.options['verbose'] >= 2 else True
                sub.calculate_feed_scores(silent=silent, stories_db=stories_db)
Example #30
0
 def collect_files(self, task_id=None):
     t1 = time.clock()
     self.files(self.path)
     self.result['no_extension'] = {'file_count': 0, 'file_list': []}
     for extension, values in self.type_nums.iteritems():
         extension = extension.strip()
         self.result[extension] = {'file_count': len(values), 'file_list': []}
         # .php : 123
         log.debug('{0} : {1}'.format(extension, len(values)))
         if task_id is not None:
             # Store
             ext = CobraExt(task_id, extension, len(values))
             db.session.add(ext)
         for f in self.file:
             es = f.split(os.extsep)
             if len(es) >= 2:
                 # Exists Extension
                 # os.extsep + es[len(es) - 1]
                 if f.endswith(extension):
                     self.result[extension]['file_list'].append(f)
             else:
                 # Didn't have extension
                 self.result['no_extension']['file_count'] = int(self.result['no_extension']['file_count']) + 1
                 self.result['no_extension']['file_list'].append(f)
     if task_id is not None:
         db.session.commit()
     t2 = time.clock()
     self.result['file_nums'] = self.file_id
     self.result['collect_time'] = t2 - t1
     return self.result
Example #31
0
                    '   ---> [%-30s] ~FRFeed throws HTTP error: ~SB%s' %
                    (unicode(feed_id)[:30], e.fp.read()))
                feed.save_feed_history(e.code, e.msg, e.fp.read())
                fetched_feed = None
            except Feed.DoesNotExist, e:
                logging.debug('   ---> [%-30s] ~FRFeed is now gone...' %
                              (unicode(feed_id)[:30]))
                continue
            except TimeoutError, e:
                logging.debug('   ---> [%-30s] ~FRFeed fetch timed out...' %
                              (feed.title[:30]))
                feed.save_feed_history(505, 'Timeout', e)
                feed_code = 505
                fetched_feed = None
            except Exception, e:
                logging.debug('[%d] ! -------------------------' % (feed_id, ))
                tb = traceback.format_exc()
                logging.error(tb)
                logging.debug('[%d] ! -------------------------' % (feed_id, ))
                ret_feed = FEED_ERREXC
                feed = Feed.get_by_id(getattr(feed, 'pk', feed_id))
                if not feed: continue
                feed.save_feed_history(500, "Error", tb)
                feed_code = 500
                fetched_feed = None
                # mail_feed_error_to_admin(feed, e, local_vars=locals())
                if (not settings.DEBUG and hasattr(settings, 'RAVEN_CLIENT')
                        and settings.RAVEN_CLIENT):
                    settings.RAVEN_CLIENT.captureException()

            if not feed_code:
Example #32
0
 def __init__(self, client):
     super().__init__()
     log.debug('init recv thread...')
     self.client = client
Example #33
0
    def run(self):

        local_ip, local_port = get_ip_address()
        log.info(f'local IP Address: {local_ip}, {local_port}')

        message = local_ip + SEPERATOR \
                    + str(local_port) + SEPERATOR \
                    + self.client.serial

        TIMEOUT_S = 60
        PING_INTERVAL_S = 30
        LOOP_INTERVAL_S = 5
        conn_flag = False

        server_peer = Peer(self.client.server_ip, self.client.server_port)
        while not self.is_interrupted():
            current_time = get_current_time_sec()
            with self.client.mutex_for_kcp_peer_map:
                for kcp_peer in list(self.kcp_peer_map.values()):
                    if kcp_peer.last_ping + TIMEOUT_S < current_time:
                        if kcp_peer.peer == server_peer:
                            continue

                        log.info(f'removed kcp_peer: {kcp_peer.peer.key}')
                        del self.kcp_peer_map[kcp_peer.peer.key]

                    elif kcp_peer.last_ping + PING_INTERVAL_S < current_time:
                        message_wrapper = MessageWrapper(
                            registered_types=self.client.registered_types,
                            message=None,
                            message_type=MessageType.RAWBYTE,
                            packet_type=RM.PING_REQUEST,
                            connection_id=0)

                        kcp_peer.send(message_wrapper)

            with self.client.mutex_for_rendvs_sess_map:
                for key, rendvs_sess in list(self.rendvs_sess_map.items()):
                    if rendvs_sess is None:
                        continue

                    if rendvs_sess.relay_kcp_peer is not None:
                        last_ping = rendvs_sess.relay_kcp_peer.last_ping
                        if last_ping + TIMEOUT_S < current_time:
                            log.warning(f'relay peer removed, {last_ping}')
                            rendvs_sess.relay_kcp_peer = None

                    if rendvs_sess.public_kcp_peer is not None:
                        last_ping = rendvs_sess.public_kcp_peer.last_ping
                        if last_ping + TIMEOUT_S < current_time:
                            log.warning(f'public peer removed, {last_ping}')
                            rendvs_sess.public_kcp_peer = None

                    if rendvs_sess.private_kcp_peer is not None:
                        last_ping = rendvs_sess.private_kcp_peer.last_ping
                        if last_ping + TIMEOUT_S < current_time:
                            log.warning(f'private peer removed, {last_ping}')
                            rendvs_sess.private_kcp_peer = None

                    if not rendvs_sess.is_connected():
                        del self.rendvs_sess_map[key]

                        if self.client.on_disconnected is not None:
                            self.client.on_disconnected(rendvs_sess)

                        log.info(f'Disconnected, connectionID=\
                                        {rendvs_sess.connection_id}')

            if not self.client.is_connected:
                if conn_flag:
                    with self.client.mutex_for_kcp_peer_map:
                        del self.client.kcp_peer_map[server_peer.key]
                    self.client.sock = create_udp_socket()
                    self.client.on_server_connect_failed()

                else:
                    conn_flag = True

                self.client.on_server_connecting()

                message_wrapper = MessageWrapper(
                    registered_types=self.client.registered_types,
                    message=message,
                    message_type=MessageType.RAWBYTE,
                    packet_type=RM.REGISTRATION_RENDEZVOUS_CLIENT_REQUEST,
                    connection_id=0)

                self.client.get_kcp_peer(server_peer).send(message_wrapper)

            elif self.client.get_kcp_peer(server_peer).last_ping \
                    + TIMEOUT_S < current_time:
                
                with self.client.mutex_for_kcp_peer_map:
                    del self.client.kcp_peer_map[server_peer.key]

                conn_flag = False
                self.client.is_connected = False
                self.client.sock = create_udp_socket()
                self.client.on_server_disconnected()

            else:
                message_wrapper = MessageWrapper(
                    registered_types=self.client.registered_types,
                    message=message,
                    message_type=MessageType.RAWBYTE,
                    packet_type=RM.REGISTRATION_RENDEZVOUS_CLIENT_REQUEST,
                    connection_id=0)

                self.client.get_kcp_peer(server_peer).send(message_wrapper)

            time.sleep(LOOP_INTERVAL_S)

        log.debug('registerThread finished')

        return 
Example #34
0
 async def post(self, *args, **kwargs):
     """ create proxies
     """
     datas = self.get_body()
     logger.debug('datas:', datas, caller=self)
     self.do_success({'ok': 1}, 'todo')
Example #35
0
def check_string(text_to_check):
    """检查是否符合规范,输出错误

    Args:
        text_to_check (str): 一个待检查的字符串

    Returns:
        return (bool): 是否合格
    """
    if text_to_check == '':
        return True

    stack = {'<>': 0, 'c': 0, 't': 0}
    i = 0
    len_text = len(text_to_check)

    while i < len_text:
        curr_char = text_to_check[i]
        # <<>> 的匹配
        if curr_char == '<':
            if i + 1 < len_text and text_to_check[i + 1] == '<':
                # 可能只是单独的 < 符号
                stack['<>'] += 1
                i += 1
        elif curr_char == '>':
            if i + 1 < len_text and text_to_check[i + 1] == '>':
                stack['<>'] -= 1
                i += 1
        elif curr_char == '|':
            # 颜色
            if text_to_check[i + 1] == 'c':
                search_not_match = re.compile(r'[^0-9a-fA-F]').search
                if search_not_match(text_to_check[i + 2:i + 2 +
                                                  6]):  # 含有颜色以外的字符
                    log.debug('find error: color |c')
                    return False
                stack['c'] += 1
                i += 7
            # 颜色结束
            elif text_to_check[i + 1] == 'r':
                stack['c'] -= 1
                i += 1
            # 调用
            elif text_to_check[i + 1] == 't':
                if stack['t'] == 0:
                    stack['t'] += 1
                    i += 1
                else:
                    stack['t'] -= 1
                    i += 1
        elif curr_char == '\\':
            if text_to_check[i + 1] not in r'\n"':
                log.debug(r'find error: usage of \: is it \\ or \n?')
                return False
            i += 1
        # <<>> 之间内容检查,暂无
        # 数量检查
        # |c |r 的情况似乎比较灵活
        if stack['<>'] < 0 or stack['<>'] > 1 or stack['c'] < -1 or stack[
                'c'] > 1 or stack['t'] > 1:
            log.debug('find error: <<>>, |c|r not match')
            return False
        # iter
        i += 1

    # 最终的匹配检查
    if stack['<>'] != 0 or stack['c'] < -1 or stack['c'] > 1 or stack['t'] != 0:
        log.debug('find error: <<>>, |c|r not match')
        return False
    return True
Example #36
0
 def get_driver(self):
     debug('webdriver  ->获取并返回当前driver实例。')
     return self.driver
Example #37
0
def ReimportStripeHistory():
    logging.debug(" ---> Reimporting Stripe history...")
    Profile.reimport_stripe_history(limit=10, days=1)
Example #38
0
def CleanSpam():
    logging.debug(" ---> Finding spammers...")
    Profile.clear_dead_spammers(confirm=True)
Example #39
0
    def process_feed_wrapper(self, feed_queue):
        delta = None
        current_process = multiprocessing.current_process()
        identity = "X"
        feed = None

        if current_process._identity:
            identity = current_process._identity[0]

        for feed_id in feed_queue:
            start_duration = time.time()
            feed_fetch_duration = None
            feed_process_duration = None
            page_duration = None
            icon_duration = None
            feed_code = None
            ret_entries = None
            start_time = time.time()
            ret_feed = FEED_ERREXC
            try:
                feed = self.refresh_feed(feed_id)

                skip = False
                if self.options.get('fake'):
                    skip = True
                    weight = "-"
                    quick = "-"
                    rand = "-"
                elif (self.options.get('quick') and not self.options['force']
                      and feed.known_good and feed.fetched_once
                      and not feed.is_push):
                    weight = feed.stories_last_month * feed.num_subscribers
                    random_weight = random.randint(1, max(weight, 1))
                    quick = float(self.options.get('quick', 0))
                    rand = random.random()
                    if random_weight < 100 and rand < quick:
                        skip = True
                if skip:
                    logging.debug(
                        '   ---> [%-30s] ~BGFaking fetch, skipping (%s/month, %s subs, %s < %s)...'
                        % (feed.title[:30], weight, feed.num_subscribers, rand,
                           quick))
                    continue

                ffeed = FetchFeed(feed_id, self.options)
                ret_feed, fetched_feed = ffeed.fetch()
                feed_fetch_duration = time.time() - start_duration

                if ((fetched_feed and ret_feed == FEED_OK)
                        or self.options['force']):
                    pfeed = ProcessFeed(feed_id, fetched_feed, self.options)
                    ret_feed, ret_entries = pfeed.process()
                    feed = pfeed.feed
                    feed_process_duration = time.time() - start_duration

                    if (ret_entries
                            and ret_entries['new']) or self.options['force']:
                        start = time.time()
                        if not feed.known_good or not feed.fetched_once:
                            feed.known_good = True
                            feed.fetched_once = True
                            feed = feed.save()
                        if self.options['force'] or random.random() <= 0.02:
                            logging.debug(
                                '   ---> [%-30s] ~FBPerforming feed cleanup...'
                                % (feed.title[:30], ))
                            start_cleanup = time.time()
                            feed.sync_redis()
                            logging.debug(
                                '   ---> [%-30s] ~FBDone with feed cleanup. Took ~SB%.4s~SN sec.'
                                %
                                (feed.title[:30], time.time() - start_cleanup))
                        try:
                            self.count_unreads_for_subscribers(feed)
                        except TimeoutError:
                            logging.debug(
                                '   ---> [%-30s] Unread count took too long...'
                                % (feed.title[:30], ))
                        if self.options['verbose']:
                            logging.debug(
                                u'   ---> [%-30s] ~FBTIME: unread count in ~FM%.4ss'
                                % (feed.title[:30], time.time() - start))
            except urllib2.HTTPError, e:
                logging.debug(
                    '   ---> [%-30s] ~FRFeed throws HTTP error: ~SB%s' %
                    (unicode(feed_id)[:30], e.fp.read()))
                feed.save_feed_history(e.code, e.msg, e.fp.read())
                fetched_feed = None
            except Feed.DoesNotExist, e:
                logging.debug('   ---> [%-30s] ~FRFeed is now gone...' %
                              (unicode(feed_id)[:30]))
                continue
Example #40
0
class Dispatcher:
    def __init__(self, options, num_threads):
        self.options = options
        self.feed_stats = {
            FEED_OK: 0,
            FEED_SAME: 0,
            FEED_ERRPARSE: 0,
            FEED_ERRHTTP: 0,
            FEED_ERREXC: 0
        }
        self.feed_trans = {
            FEED_OK: 'ok',
            FEED_SAME: 'unchanged',
            FEED_ERRPARSE: 'cant_parse',
            FEED_ERRHTTP: 'http_error',
            FEED_ERREXC: 'exception'
        }
        self.feed_keys = sorted(self.feed_trans.keys())
        self.num_threads = num_threads
        self.time_start = datetime.datetime.utcnow()
        self.workers = []

    def refresh_feed(self, feed_id):
        """Update feed, since it may have changed"""
        return Feed.objects.using('default').get(pk=feed_id)

    def process_feed_wrapper(self, feed_queue):
        delta = None
        current_process = multiprocessing.current_process()
        identity = "X"
        feed = None

        if current_process._identity:
            identity = current_process._identity[0]

        for feed_id in feed_queue:
            start_duration = time.time()
            feed_fetch_duration = None
            feed_process_duration = None
            page_duration = None
            icon_duration = None
            feed_code = None
            ret_entries = None
            start_time = time.time()
            ret_feed = FEED_ERREXC
            try:
                feed = self.refresh_feed(feed_id)

                skip = False
                if self.options.get('fake'):
                    skip = True
                    weight = "-"
                    quick = "-"
                    rand = "-"
                elif (self.options.get('quick') and not self.options['force']
                      and feed.known_good and feed.fetched_once
                      and not feed.is_push):
                    weight = feed.stories_last_month * feed.num_subscribers
                    random_weight = random.randint(1, max(weight, 1))
                    quick = float(self.options.get('quick', 0))
                    rand = random.random()
                    if random_weight < 100 and rand < quick:
                        skip = True
                if skip:
                    logging.debug(
                        '   ---> [%-30s] ~BGFaking fetch, skipping (%s/month, %s subs, %s < %s)...'
                        % (feed.title[:30], weight, feed.num_subscribers, rand,
                           quick))
                    continue

                ffeed = FetchFeed(feed_id, self.options)
                ret_feed, fetched_feed = ffeed.fetch()
                feed_fetch_duration = time.time() - start_duration

                if ((fetched_feed and ret_feed == FEED_OK)
                        or self.options['force']):
                    pfeed = ProcessFeed(feed_id, fetched_feed, self.options)
                    ret_feed, ret_entries = pfeed.process()
                    feed = pfeed.feed
                    feed_process_duration = time.time() - start_duration

                    if (ret_entries
                            and ret_entries['new']) or self.options['force']:
                        start = time.time()
                        if not feed.known_good or not feed.fetched_once:
                            feed.known_good = True
                            feed.fetched_once = True
                            feed = feed.save()
                        if self.options['force'] or random.random() <= 0.02:
                            logging.debug(
                                '   ---> [%-30s] ~FBPerforming feed cleanup...'
                                % (feed.title[:30], ))
                            start_cleanup = time.time()
                            feed.sync_redis()
                            logging.debug(
                                '   ---> [%-30s] ~FBDone with feed cleanup. Took ~SB%.4s~SN sec.'
                                %
                                (feed.title[:30], time.time() - start_cleanup))
                        try:
                            self.count_unreads_for_subscribers(feed)
                        except TimeoutError:
                            logging.debug(
                                '   ---> [%-30s] Unread count took too long...'
                                % (feed.title[:30], ))
                        if self.options['verbose']:
                            logging.debug(
                                u'   ---> [%-30s] ~FBTIME: unread count in ~FM%.4ss'
                                % (feed.title[:30], time.time() - start))
            except urllib2.HTTPError, e:
                logging.debug(
                    '   ---> [%-30s] ~FRFeed throws HTTP error: ~SB%s' %
                    (unicode(feed_id)[:30], e.fp.read()))
                feed.save_feed_history(e.code, e.msg, e.fp.read())
                fetched_feed = None
            except Feed.DoesNotExist, e:
                logging.debug('   ---> [%-30s] ~FRFeed is now gone...' %
                              (unicode(feed_id)[:30]))
                continue
            except TimeoutError, e:
                logging.debug('   ---> [%-30s] ~FRFeed fetch timed out...' %
                              (feed.title[:30]))
                feed.save_feed_history(505, 'Timeout', e)
                feed_code = 505
                fetched_feed = None
Example #41
0
    def process(self):
        """ Downloads and parses a feed.
        """
        start = time.time()
        self.refresh_feed()

        ret_values = dict(new=0, updated=0, same=0, error=0)

        # logging.debug(u' ---> [%d] Processing %s' % (self.feed.id, self.feed.feed_title))

        if hasattr(self.fpf, 'status'):
            if self.options['verbose']:
                if self.fpf.bozo and self.fpf.status != 304:
                    logging.debug(
                        u'   ---> [%-30s] ~FRBOZO exception: %s ~SB(%s entries)'
                        % (self.feed.title[:30], self.fpf.bozo_exception,
                           len(self.fpf.entries)))

            if self.fpf.status == 304:
                self.feed = self.feed.save()
                self.feed.save_feed_history(304, "Not modified")
                return FEED_SAME, ret_values

            # 302: Temporary redirect: ignore
            # 301: Permanent redirect: save it
            if self.fpf.status == 301:
                if not self.fpf.href.endswith('feedburner.com/atom.xml'):
                    self.feed.feed_address = self.fpf.href
                if not self.feed.known_good:
                    self.feed.fetched_once = True
                    logging.debug(
                        "   ---> [%-30s] ~SB~SK~FRFeed is %s'ing. Refetching..."
                        % (self.feed.title[:30], self.fpf.status))
                    self.feed = self.feed.schedule_feed_fetch_immediately()
                if not self.fpf.entries:
                    self.feed = self.feed.save()
                    self.feed.save_feed_history(self.fpf.status,
                                                "HTTP Redirect")
                    return FEED_ERRHTTP, ret_values
            if self.fpf.status >= 400:
                logging.debug(
                    "   ---> [%-30s] ~SB~FRHTTP Status code: %s. Checking address..."
                    % (self.feed.title[:30], self.fpf.status))
                fixed_feed = None
                if not self.feed.known_good:
                    fixed_feed, feed = self.feed.check_feed_link_for_feed_address(
                    )
                if not fixed_feed:
                    self.feed.save_feed_history(self.fpf.status, "HTTP Error")
                else:
                    self.feed = feed
                self.feed = self.feed.save()
                return FEED_ERRHTTP, ret_values

        if not self.fpf.entries:
            if self.fpf.bozo and isinstance(self.fpf.bozo_exception,
                                            feedparser.NonXMLContentType):
                logging.debug(
                    "   ---> [%-30s] ~SB~FRFeed is Non-XML. %s entries. Checking address..."
                    % (self.feed.title[:30], len(self.fpf.entries)))
                fixed_feed = None
                if not self.feed.known_good:
                    fixed_feed, feed = self.feed.check_feed_link_for_feed_address(
                    )
                if not fixed_feed:
                    self.feed.save_feed_history(552, 'Non-xml feed',
                                                self.fpf.bozo_exception)
                else:
                    self.feed = feed
                self.feed = self.feed.save()
                return FEED_ERRPARSE, ret_values
            elif self.fpf.bozo and isinstance(
                    self.fpf.bozo_exception, xml.sax._exceptions.SAXException):
                logging.debug(
                    "   ---> [%-30s] ~SB~FRFeed has SAX/XML parsing issues. %s entries. Checking address..."
                    % (self.feed.title[:30], len(self.fpf.entries)))
                fixed_feed = None
                if not self.feed.known_good:
                    fixed_feed, feed = self.feed.check_feed_link_for_feed_address(
                    )
                if not fixed_feed:
                    self.feed.save_feed_history(553, 'SAX Exception',
                                                self.fpf.bozo_exception)
                else:
                    self.feed = feed
                self.feed = self.feed.save()
                return FEED_ERRPARSE, ret_values

        # the feed has changed (or it is the first time we parse it)
        # saving the etag and last_modified fields
        self.feed.etag = self.fpf.get('etag')
        if self.feed.etag:
            self.feed.etag = self.feed.etag[:255]
        # some times this is None (it never should) *sigh*
        if self.feed.etag is None:
            self.feed.etag = ''

        try:
            self.feed.last_modified = mtime(self.fpf.modified)
        except:
            self.feed.last_modified = None
            pass

        self.fpf.entries = self.fpf.entries[:100]

        if self.fpf.feed.get('title'):
            self.feed.feed_title = strip_tags(self.fpf.feed.get('title'))
        # Deleted by Xinyan Lu : No this table
        # tagline = self.fpf.feed.get('tagline', self.feed.data.feed_tagline)
        # if tagline:
        #     self.feed.data.feed_tagline = utf8encode(tagline)
        #     self.feed.data.save()
        if not self.feed.feed_link_locked:
            self.feed.feed_link = self.fpf.feed.get(
                'link') or self.fpf.feed.get('id') or self.feed.feed_link

        self.feed = self.feed.save()

        # Compare new stories to existing stories, adding and updating
        start_date = datetime.datetime.utcnow()
        story_guids = []
        stories = []
        for entry in self.fpf.entries:
            story = pre_process_story(entry)
            if story.get('published') < start_date:
                start_date = story.get('published')
            stories.append(story)
            story_guids.append(story.get('guid'))

        existing_stories = dict((s.story_guid, s) for s in MStory.objects(
            # story_guid__in=story_guids,
            story_date__gte=start_date,
            story_feed_id=self.feed.pk).limit(
                max(int(len(story_guids) * 1.5), 10)))

        ret_values = self.feed.add_update_stories(
            stories, existing_stories, verbose=self.options['verbose'])

        if (hasattr(self.fpf, 'feed') and hasattr(self.fpf.feed, 'links')
                and self.fpf.feed.links):
            hub_url = None
            self_url = self.feed.feed_address
            for link in self.fpf.feed.links:
                if link['rel'] == 'hub' and not hub_url:
                    hub_url = link['href']
                elif link['rel'] == 'self':
                    self_url = link['href']
            push_expired = False
            if self.feed.is_push:
                try:
                    push_expired = self.feed.push.lease_expires < datetime.datetime.now(
                    )
                except PushSubscription.DoesNotExist:
                    self.feed.is_push = False
            if (hub_url and self_url and not settings.DEBUG
                    and self.feed.active_subscribers > 0
                    and (push_expired or not self.feed.is_push
                         or self.options.get('force'))):
                logging.debug(
                    u'   ---> [%-30s] ~BB~FW%sSubscribing to PuSH hub: %s' %
                    (self.feed.title[:30], "~SKRe-~SN" if push_expired else "",
                     hub_url))
                try:
                    PushSubscription.objects.subscribe(self_url,
                                                       feed=self.feed,
                                                       hub=hub_url)
                except TimeoutError:
                    logging.debug(
                        u'   ---> [%-30s] ~BB~FW~FRTimed out~FW subscribing to PuSH hub: %s'
                        % (self.feed.title[:30], hub_url))
            elif (self.feed.is_push
                  and (self.feed.active_subscribers <= 0 or not hub_url)):
                logging.debug(
                    u'   ---> [%-30s] ~BB~FWTurning off PuSH, no hub found' %
                    (self.feed.title[:30]))
                self.feed.is_push = False
                self.feed = self.feed.save()

        logging.debug(
            u'   ---> [%-30s] ~FYParsed Feed: %snew=%s~SN~FY %sup=%s~SN same=%s%s~SN %serr=%s~SN~FY total=~SB%s'
            % (self.feed.title[:30], '~FG~SB' if ret_values['new'] else '',
               ret_values['new'], '~FY~SB' if ret_values['updated'] else '',
               ret_values['updated'], '~SB' if ret_values['same'] else '',
               ret_values['same'], '~FR~SB' if ret_values['error'] else '',
               ret_values['error'], len(self.fpf.entries)))
        self.feed.update_all_statistics(full=bool(ret_values['new']),
                                        force=self.options['force'])
        if ret_values['new']:
            self.feed.trim_feed()
            self.feed.expire_redis()
        self.feed.save_feed_history(200, "OK")

        if self.options['verbose']:
            logging.debug(u'   ---> [%-30s] ~FBTIME: feed parse in ~FM%.4ss' %
                          (self.feed.title[:30], time.time() - start))

        return FEED_OK, ret_values
Example #42
0
 def refresh_feed(self):
     self.feed = Feed.get_by_id(self.feed_id)
     if self.feed_id != self.feed.pk:
         logging.debug(" ***> Feed has changed: from %s to %s" %
                       (self.feed_id, self.feed.pk))
         self.feed_id = self.feed.pk
Example #43
0
    def process(self):
        """ Downloads and parses a feed.
        """
        start = time.time()
        self.refresh_feed()

        ret_values = dict(new=0, updated=0, same=0, error=0)

        if hasattr(self.fpf, 'status'):
            if self.options['verbose']:
                if self.fpf.bozo and self.fpf.status != 304:
                    logging.debug(
                        u'   ---> [%-30s] ~FRBOZO exception: %s ~SB(%s entries)'
                        % (self.feed.log_title[:30], self.fpf.bozo_exception,
                           len(self.fpf.entries)))

            if self.fpf.status == 304:
                self.feed = self.feed.save()
                self.feed.save_feed_history(304, "Not modified")
                return FEED_SAME, ret_values

            # 302 and 307: Temporary redirect: ignore
            # 301 and 308: Permanent redirect: save it (after 10 tries)
            if self.fpf.status == 301 or self.fpf.status == 308:
                if self.fpf.href.endswith('feedburner.com/atom.xml'):
                    return FEED_ERRHTTP, ret_values
                redirects, non_redirects = self.feed.count_redirects_in_history(
                    'feed')
                self.feed.save_feed_history(
                    self.fpf.status,
                    "HTTP Redirect (%d to go)" % (10 - len(redirects)))
                if len(redirects) >= 10 or len(non_redirects) == 0:
                    address = self.fpf.href
                    if self.options['force'] and address:
                        address = qurl(address, remove=['_'])
                    self.feed.feed_address = address
                if not self.feed.known_good:
                    self.feed.fetched_once = True
                    logging.debug(
                        "   ---> [%-30s] ~SB~SK~FRFeed is %s'ing. Refetching..."
                        % (self.feed.log_title[:30], self.fpf.status))
                    self.feed = self.feed.schedule_feed_fetch_immediately()
                if not self.fpf.entries:
                    self.feed = self.feed.save()
                    self.feed.save_feed_history(self.fpf.status,
                                                "HTTP Redirect")
                    return FEED_ERRHTTP, ret_values
            if self.fpf.status >= 400:
                logging.debug(
                    "   ---> [%-30s] ~SB~FRHTTP Status code: %s. Checking address..."
                    % (self.feed.log_title[:30], self.fpf.status))
                fixed_feed = None
                if not self.feed.known_good:
                    fixed_feed, feed = self.feed.check_feed_link_for_feed_address(
                    )
                if not fixed_feed:
                    self.feed.save_feed_history(self.fpf.status, "HTTP Error")
                else:
                    self.feed = feed
                self.feed = self.feed.save()
                return FEED_ERRHTTP, ret_values

        if not self.fpf:
            logging.debug(
                "   ---> [%-30s] ~SB~FRFeed is Non-XML. No feedparser feed either!"
                % (self.feed.log_title[:30]))
            self.feed.save_feed_history(551, "Broken feed")
            return FEED_ERRHTTP, ret_values

        if self.fpf and not self.fpf.entries:
            if self.fpf.bozo and isinstance(self.fpf.bozo_exception,
                                            feedparser.NonXMLContentType):
                logging.debug(
                    "   ---> [%-30s] ~SB~FRFeed is Non-XML. %s entries. Checking address..."
                    % (self.feed.log_title[:30], len(self.fpf.entries)))
                fixed_feed = None
                if not self.feed.known_good:
                    fixed_feed, feed = self.feed.check_feed_link_for_feed_address(
                    )
                if not fixed_feed:
                    self.feed.save_feed_history(552, 'Non-xml feed',
                                                self.fpf.bozo_exception)
                else:
                    self.feed = feed
                self.feed = self.feed.save()
                return FEED_ERRPARSE, ret_values
            elif self.fpf.bozo and isinstance(
                    self.fpf.bozo_exception, xml.sax._exceptions.SAXException):
                logging.debug(
                    "   ---> [%-30s] ~SB~FRFeed has SAX/XML parsing issues. %s entries. Checking address..."
                    % (self.feed.log_title[:30], len(self.fpf.entries)))
                fixed_feed = None
                if not self.feed.known_good:
                    fixed_feed, feed = self.feed.check_feed_link_for_feed_address(
                    )
                if not fixed_feed:
                    self.feed.save_feed_history(553, 'Not an RSS feed',
                                                self.fpf.bozo_exception)
                else:
                    self.feed = feed
                self.feed = self.feed.save()
                return FEED_ERRPARSE, ret_values

        # the feed has changed (or it is the first time we parse it)
        # saving the etag and last_modified fields
        original_etag = self.feed.etag
        self.feed.etag = self.fpf.get('etag')
        if self.feed.etag:
            self.feed.etag = self.feed.etag[:255]
        # some times this is None (it never should) *sigh*
        if self.feed.etag is None:
            self.feed.etag = ''
        if self.feed.etag != original_etag:
            self.feed.save(update_fields=['etag'])

        original_last_modified = self.feed.last_modified
        if hasattr(self.fpf, 'modified') and self.fpf.modified:
            try:
                self.feed.last_modified = datetime.datetime.strptime(
                    self.fpf.modified, '%a, %d %b %Y %H:%M:%S %Z')
            except Exception, e:
                self.feed.last_modified = None
                logging.debug("Broken mtime %s: %s" %
                              (self.feed.last_modified, e))
                pass
Example #44
0
# -*- encoding:utf-8 -*-
from utils.log import debug
from utils.abs import Singleton
from congig import DRIVER
from selenium import webdriver


@Singleton
class WebDriver():
    driver=None

    def __init__(self):
        if self.driver==None:
            debug('webdriver ->初始化driver:%s'%DRIVER)
            if DRIVER=='Chrome':
                self.driver=webdriver.Chrome()
            else:
                self.driver=webdriver.Firefox()
        else:
            debug('webdriver ->%s driver已经实例化。'%DRIVER)

    def get_driver(self):
        debug('webdriver  ->获取并返回当前driver实例。')
        return self.driver
driver=WebDriver().get_driver()
debug('webdriver.py  -> 实例化并返回当前driver实例:%s'%str(driver))
Example #45
0
    def fetch_youtube(self, address):
        username = None
        channel_id = None
        list_id = None

        if 'gdata.youtube.com' in address:
            try:
                username_groups = re.search(
                    'gdata.youtube.com/feeds/\w+/users/(\w+)/', address)
                if not username_groups:
                    return
                username = username_groups.group(1)
            except IndexError:
                return
        elif 'youtube.com/feeds/videos.xml?user='******'user'][0]
            except IndexError:
                return
        elif 'youtube.com/feeds/videos.xml?channel_id=' in address:
            try:
                channel_id = urlparse.parse_qs(
                    urlparse.urlparse(address).query)['channel_id'][0]
            except (IndexError, KeyError):
                return
        elif 'youtube.com/playlist' in address:
            try:
                list_id = urlparse.parse_qs(
                    urlparse.urlparse(address).query)['list'][0]
            except IndexError:
                return
        elif 'youtube.com/feeds/videos.xml?playlist_id' in address:
            try:
                list_id = urlparse.parse_qs(
                    urlparse.urlparse(address).query)['playlist_id'][0]
            except IndexError:
                return

        if channel_id:
            video_ids_xml = requests.get(
                "https://www.youtube.com/feeds/videos.xml?channel_id=%s" %
                channel_id,
                verify=False)
            channel_json = requests.get(
                "https://www.googleapis.com/youtube/v3/channels?part=snippet&id=%s&key=%s"
                % (channel_id, settings.YOUTUBE_API_KEY))
            channel = json.decode(channel_json.content)
            try:
                username = channel['items'][0]['snippet']['title']
                description = channel['items'][0]['snippet']['description']
            except (IndexError, KeyError):
                return
        elif list_id:
            playlist_json = requests.get(
                "https://www.googleapis.com/youtube/v3/playlists?part=snippet&id=%s&key=%s"
                % (list_id, settings.YOUTUBE_API_KEY))
            playlist = json.decode(playlist_json.content)
            try:
                username = playlist['items'][0]['snippet']['title']
                description = playlist['items'][0]['snippet']['description']
            except (IndexError, KeyError):
                return
            channel_url = "https://www.youtube.com/playlist?list=%s" % list_id
        elif username:
            video_ids_xml = requests.get(
                "https://www.youtube.com/feeds/videos.xml?user=%s" % username,
                verify=False)
            description = "YouTube videos uploaded by %s" % username
        else:
            return

        if list_id:
            playlist_json = requests.get(
                "https://www.googleapis.com/youtube/v3/playlistItems?part=snippet&playlistId=%s&key=%s"
                % (list_id, settings.YOUTUBE_API_KEY))
            playlist = json.decode(playlist_json.content)
            try:
                video_ids = [
                    video['snippet']['resourceId']['videoId']
                    for video in playlist['items']
                ]
            except (IndexError, KeyError):
                return
        else:
            if video_ids_xml.status_code != 200:
                return
            video_ids_soup = BeautifulSoup(video_ids_xml.content)
            channel_url = video_ids_soup.find('author').find('uri').getText()
            video_ids = []
            for video_id in video_ids_soup.findAll('yt:videoid'):
                video_ids.append(video_id.getText())

        videos_json = requests.get(
            "https://www.googleapis.com/youtube/v3/videos?part=contentDetails%%2Csnippet&id=%s&key=%s"
            % (','.join(video_ids), settings.YOUTUBE_API_KEY))
        videos = json.decode(videos_json.content)
        if 'error' in videos:
            logging.debug(" ***> ~FRYoutube returned an error: ~FM~SB%s" %
                          (videos))
            return

        data = {}
        data['title'] = ("%s's YouTube Videos" %
                         username if 'Uploads' not in username else username)
        data['link'] = channel_url
        data['description'] = description
        data['lastBuildDate'] = datetime.datetime.utcnow()
        data[
            'generator'] = 'NewsBlur YouTube API v3 Decrapifier - %s' % settings.NEWSBLUR_URL
        data['docs'] = None
        data['feed_url'] = address
        rss = feedgenerator.Atom1Feed(**data)

        for video in videos['items']:
            thumbnail = video['snippet']['thumbnails'].get('maxres')
            if not thumbnail:
                thumbnail = video['snippet']['thumbnails'].get('high')
            if not thumbnail:
                thumbnail = video['snippet']['thumbnails'].get('medium')
            duration_sec = isodate.parse_duration(
                video['contentDetails']['duration']).seconds
            if duration_sec >= 3600:
                hours = (duration_sec / 3600)
                minutes = (duration_sec - (hours * 3600)) / 60
                seconds = duration_sec - (hours * 3600) - (minutes * 60)
                duration = "%s:%s:%s" % (hours, '{0:02d}'.format(minutes),
                                         '{0:02d}'.format(seconds))
            else:
                minutes = duration_sec / 60
                seconds = duration_sec - (minutes * 60)
                duration = "%s:%s" % ('{0:02d}'.format(minutes),
                                      '{0:02d}'.format(seconds))
            content = """<div class="NB-youtube-player"><iframe allowfullscreen="true" src="%s?iv_load_policy=3"></iframe></div>
                         <div class="NB-youtube-stats"><small>
                             <b>From:</b> <a href="%s">%s</a><br />
                             <b>Duration:</b> %s<br />
                         </small></div><hr>
                         <div class="NB-youtube-description">%s</div>
                         <img src="%s" style="display:none" />""" % (
                ("https://www.youtube.com/embed/" + video['id']),
                channel_url,
                username,
                duration,
                linkify(linebreaks(video['snippet']['description'])),
                thumbnail['url'] if thumbnail else "",
            )

            link = "http://www.youtube.com/watch?v=%s" % video['id']
            story_data = {
                'title': video['snippet']['title'],
                'link': link,
                'description': content,
                'author_name': username,
                'categories': [],
                'unique_id': "tag:youtube.com,2008:video:%s" % video['id'],
                'pubdate':
                dateutil.parser.parse(video['snippet']['publishedAt']),
            }
            rss.add_item(**story_data)

        return rss.writeString('utf-8')
Example #46
0
    def fetch(self):
        """ 
        Uses requests to download the feed, parsing it in feedparser. Will be storified later.
        """
        start = time.time()
        identity = self.get_identity()
        log_msg = u'%2s ---> [%-30s] ~FYFetching feed (~FB%d~FY), last update: %s' % (
            identity, self.feed.log_title[:30], self.feed.id,
            datetime.datetime.now() - self.feed.last_update)
        logging.debug(log_msg)

        etag = self.feed.etag
        modified = self.feed.last_modified.utctimetuple(
        )[:7] if self.feed.last_modified else None
        address = self.feed.feed_address

        if (self.options.get('force') or random.random() <= .01):
            self.options['force'] = True
            modified = None
            etag = None
            address = qurl(address, add={"_": random.randint(0, 10000)})
            logging.debug(u'   ---> [%-30s] ~FBForcing fetch: %s' %
                          (self.feed.log_title[:30], address))
        elif (not self.feed.fetched_once or not self.feed.known_good):
            modified = None
            etag = None

        if self.options.get('feed_xml'):
            logging.debug(
                u'   ---> [%-30s] ~FM~BKFeed has been fat pinged. Ignoring fat: %s'
                %
                (self.feed.log_title[:30], len(self.options.get('feed_xml'))))

        if self.options.get('fpf'):
            self.fpf = self.options.get('fpf')
            logging.debug(
                u'   ---> [%-30s] ~FM~BKFeed fetched in real-time with fat ping.'
                % (self.feed.log_title[:30]))
            return FEED_OK, self.fpf

        if 'youtube.com' in address:
            try:
                youtube_feed = self.fetch_youtube(address)
            except (requests.adapters.ConnectionError):
                youtube_feed = None
            if not youtube_feed:
                logging.debug(u'   ***> [%-30s] ~FRYouTube fetch failed: %s.' %
                              (self.feed.log_title[:30], address))
                return FEED_ERRHTTP, None
            self.fpf = feedparser.parse(youtube_feed)
        elif re.match('(https?)?://twitter.com/\w+/?$',
                      qurl(address, remove=['_'])):
            twitter_feed = self.fetch_twitter(address)
            if not twitter_feed:
                logging.debug(u'   ***> [%-30s] ~FRTwitter fetch failed: %s' %
                              (self.feed.log_title[:30], address))
                return FEED_ERRHTTP, None
            self.fpf = feedparser.parse(twitter_feed)

        if not self.fpf:
            try:
                headers = self.feed.fetch_headers()
                if etag:
                    headers['If-None-Match'] = etag
                if modified:
                    # format into an RFC 1123-compliant timestamp. We can't use
                    # time.strftime() since the %a and %b directives can be affected
                    # by the current locale, but RFC 2616 states that dates must be
                    # in English.
                    short_weekdays = [
                        'Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'
                    ]
                    months = [
                        'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug',
                        'Sep', 'Oct', 'Nov', 'Dec'
                    ]
                    modified_header = '%s, %02d %s %04d %02d:%02d:%02d GMT' % (
                        short_weekdays[modified[6]], modified[2],
                        months[modified[1] - 1], modified[0], modified[3],
                        modified[4], modified[5])
                    headers['If-Modified-Since'] = modified_header
                if etag or modified:
                    headers['A-IM'] = 'feed'
                raw_feed = requests.get(address, headers=headers)
                if raw_feed.status_code >= 400:
                    logging.debug(
                        "   ***> [%-30s] ~FRFeed fetch was %s status code, trying fake user agent: %s"
                        % (self.feed.log_title[:30], raw_feed.status_code,
                           raw_feed.headers))
                    raw_feed = requests.get(
                        address, headers=self.feed.fetch_headers(fake=True))

                if raw_feed.content and 'application/json' in raw_feed.headers.get(
                        'Content-Type', ""):
                    # JSON Feed
                    json_feed = self.fetch_json_feed(address, raw_feed)
                    if not json_feed:
                        logging.debug(
                            u'   ***> [%-30s] ~FRJSON fetch failed: %s' %
                            (self.feed.log_title[:30], address))
                        return FEED_ERRHTTP, None
                    self.fpf = feedparser.parse(json_feed)
                elif raw_feed.content and raw_feed.status_code < 400:
                    response_headers = raw_feed.headers
                    response_headers['Content-Location'] = raw_feed.url
                    self.raw_feed = smart_unicode(raw_feed.content)
                    self.fpf = feedparser.parse(
                        self.raw_feed, response_headers=response_headers)
                    if self.options.get('debug', False):
                        logging.debug(
                            " ---> [%-30s] ~FBFeed fetch status %s: %s length / %s"
                            % (self.feed.log_title[:30], raw_feed.status_code,
                               len(smart_unicode(
                                   raw_feed.content)), raw_feed.headers))
            except Exception, e:
                logging.debug(
                    "   ***> [%-30s] ~FRFeed failed to fetch with request, trying feedparser: %s"
                    % (self.feed.log_title[:30], unicode(e)[:100]))

            if not self.fpf or self.options.get('force_fp', False):
                try:
                    self.fpf = feedparser.parse(address,
                                                agent=self.feed.user_agent,
                                                etag=etag,
                                                modified=modified)
                except (TypeError, ValueError, KeyError, EOFError,
                        MemoryError), e:
                    logging.debug(u'   ***> [%-30s] ~FRFeed fetch error: %s' %
                                  (self.feed.log_title[:30], e))
                    pass
 def adjust_crush_tunables(self):
     log.info("Adjust Crush Tunables")
     self.adjust_crush = "ceph osd crush tunables optimal"
     log.debug(self.adjust_crush)
     os.system(self.adjust_crush)
Example #48
0
class ProcessFeed:
    def __init__(self, feed_id, fpf, options, raw_feed=None):
        self.feed_id = feed_id
        self.options = options
        self.fpf = fpf
        self.raw_feed = raw_feed

    def refresh_feed(self):
        self.feed = Feed.get_by_id(self.feed_id)
        if self.feed_id != self.feed.pk:
            logging.debug(" ***> Feed has changed: from %s to %s" %
                          (self.feed_id, self.feed.pk))
            self.feed_id = self.feed.pk

    def process(self):
        """ Downloads and parses a feed.
        """
        start = time.time()
        self.refresh_feed()

        ret_values = dict(new=0, updated=0, same=0, error=0)

        if hasattr(self.fpf, 'status'):
            if self.options['verbose']:
                if self.fpf.bozo and self.fpf.status != 304:
                    logging.debug(
                        u'   ---> [%-30s] ~FRBOZO exception: %s ~SB(%s entries)'
                        % (self.feed.log_title[:30], self.fpf.bozo_exception,
                           len(self.fpf.entries)))

            if self.fpf.status == 304:
                self.feed = self.feed.save()
                self.feed.save_feed_history(304, "Not modified")
                return FEED_SAME, ret_values

            # 302 and 307: Temporary redirect: ignore
            # 301 and 308: Permanent redirect: save it (after 10 tries)
            if self.fpf.status == 301 or self.fpf.status == 308:
                if self.fpf.href.endswith('feedburner.com/atom.xml'):
                    return FEED_ERRHTTP, ret_values
                redirects, non_redirects = self.feed.count_redirects_in_history(
                    'feed')
                self.feed.save_feed_history(
                    self.fpf.status,
                    "HTTP Redirect (%d to go)" % (10 - len(redirects)))
                if len(redirects) >= 10 or len(non_redirects) == 0:
                    address = self.fpf.href
                    if self.options['force'] and address:
                        address = qurl(address, remove=['_'])
                    self.feed.feed_address = address
                if not self.feed.known_good:
                    self.feed.fetched_once = True
                    logging.debug(
                        "   ---> [%-30s] ~SB~SK~FRFeed is %s'ing. Refetching..."
                        % (self.feed.log_title[:30], self.fpf.status))
                    self.feed = self.feed.schedule_feed_fetch_immediately()
                if not self.fpf.entries:
                    self.feed = self.feed.save()
                    self.feed.save_feed_history(self.fpf.status,
                                                "HTTP Redirect")
                    return FEED_ERRHTTP, ret_values
            if self.fpf.status >= 400:
                logging.debug(
                    "   ---> [%-30s] ~SB~FRHTTP Status code: %s. Checking address..."
                    % (self.feed.log_title[:30], self.fpf.status))
                fixed_feed = None
                if not self.feed.known_good:
                    fixed_feed, feed = self.feed.check_feed_link_for_feed_address(
                    )
                if not fixed_feed:
                    self.feed.save_feed_history(self.fpf.status, "HTTP Error")
                else:
                    self.feed = feed
                self.feed = self.feed.save()
                return FEED_ERRHTTP, ret_values

        if not self.fpf:
            logging.debug(
                "   ---> [%-30s] ~SB~FRFeed is Non-XML. No feedparser feed either!"
                % (self.feed.log_title[:30]))
            self.feed.save_feed_history(551, "Broken feed")
            return FEED_ERRHTTP, ret_values

        if self.fpf and not self.fpf.entries:
            if self.fpf.bozo and isinstance(self.fpf.bozo_exception,
                                            feedparser.NonXMLContentType):
                logging.debug(
                    "   ---> [%-30s] ~SB~FRFeed is Non-XML. %s entries. Checking address..."
                    % (self.feed.log_title[:30], len(self.fpf.entries)))
                fixed_feed = None
                if not self.feed.known_good:
                    fixed_feed, feed = self.feed.check_feed_link_for_feed_address(
                    )
                if not fixed_feed:
                    self.feed.save_feed_history(552, 'Non-xml feed',
                                                self.fpf.bozo_exception)
                else:
                    self.feed = feed
                self.feed = self.feed.save()
                return FEED_ERRPARSE, ret_values
            elif self.fpf.bozo and isinstance(
                    self.fpf.bozo_exception, xml.sax._exceptions.SAXException):
                logging.debug(
                    "   ---> [%-30s] ~SB~FRFeed has SAX/XML parsing issues. %s entries. Checking address..."
                    % (self.feed.log_title[:30], len(self.fpf.entries)))
                fixed_feed = None
                if not self.feed.known_good:
                    fixed_feed, feed = self.feed.check_feed_link_for_feed_address(
                    )
                if not fixed_feed:
                    self.feed.save_feed_history(553, 'Not an RSS feed',
                                                self.fpf.bozo_exception)
                else:
                    self.feed = feed
                self.feed = self.feed.save()
                return FEED_ERRPARSE, ret_values

        # the feed has changed (or it is the first time we parse it)
        # saving the etag and last_modified fields
        original_etag = self.feed.etag
        self.feed.etag = self.fpf.get('etag')
        if self.feed.etag:
            self.feed.etag = self.feed.etag[:255]
        # some times this is None (it never should) *sigh*
        if self.feed.etag is None:
            self.feed.etag = ''
        if self.feed.etag != original_etag:
            self.feed.save(update_fields=['etag'])

        original_last_modified = self.feed.last_modified
        if hasattr(self.fpf, 'modified') and self.fpf.modified:
            try:
                self.feed.last_modified = datetime.datetime.strptime(
                    self.fpf.modified, '%a, %d %b %Y %H:%M:%S %Z')
            except Exception, e:
                self.feed.last_modified = None
                logging.debug("Broken mtime %s: %s" %
                              (self.feed.last_modified, e))
                pass
        if self.feed.last_modified != original_last_modified:
            self.feed.save(update_fields=['last_modified'])

        self.fpf.entries = self.fpf.entries[:100]

        original_title = self.feed.feed_title
        if self.fpf.feed.get('title'):
            self.feed.feed_title = strip_tags(self.fpf.feed.get('title'))
        if self.feed.feed_title != original_title:
            self.feed.save(update_fields=['feed_title'])

        tagline = self.fpf.feed.get('tagline', self.feed.data.feed_tagline)
        if tagline:
            original_tagline = self.feed.data.feed_tagline
            self.feed.data.feed_tagline = smart_unicode(tagline)
            if self.feed.data.feed_tagline != original_tagline:
                self.feed.data.save(update_fields=['feed_tagline'])

        if not self.feed.feed_link_locked:
            new_feed_link = self.fpf.feed.get('link') or self.fpf.feed.get(
                'id') or self.feed.feed_link
            if self.options['force'] and new_feed_link:
                new_feed_link = qurl(new_feed_link, remove=['_'])
            if new_feed_link != self.feed.feed_link:
                logging.debug(
                    "   ---> [%-30s] ~SB~FRFeed's page is different: %s to %s"
                    % (self.feed.log_title[:30], self.feed.feed_link,
                       new_feed_link))
                redirects, non_redirects = self.feed.count_redirects_in_history(
                    'page')
                self.feed.save_page_history(
                    301, "HTTP Redirect (%s to go)" % (10 - len(redirects)))
                if len(redirects) >= 10 or len(non_redirects) == 0:
                    self.feed.feed_link = new_feed_link
                    self.feed.save(update_fields=['feed_link'])

        # Determine if stories aren't valid and replace broken guids
        guids_seen = set()
        permalinks_seen = set()
        for entry in self.fpf.entries:
            guids_seen.add(entry.get('guid'))
            permalinks_seen.add(Feed.get_permalink(entry))
        guid_difference = len(guids_seen) != len(self.fpf.entries)
        single_guid = len(guids_seen) == 1
        replace_guids = single_guid and guid_difference
        permalink_difference = len(permalinks_seen) != len(self.fpf.entries)
        single_permalink = len(permalinks_seen) == 1
        replace_permalinks = single_permalink and permalink_difference

        # Compare new stories to existing stories, adding and updating
        start_date = datetime.datetime.utcnow()
        story_hashes = []
        stories = []
        for entry in self.fpf.entries:
            story = pre_process_story(entry, self.fpf.encoding)
            if story.get('published') < start_date:
                start_date = story.get('published')
            if replace_guids:
                if replace_permalinks:
                    new_story_guid = unicode(story.get('published'))
                    if self.options['verbose']:
                        logging.debug(
                            u'   ---> [%-30s] ~FBReplacing guid (%s) with timestamp: %s'
                            % (self.feed.log_title[:30], story.get('guid'),
                               new_story_guid))
                    story['guid'] = new_story_guid
                else:
                    new_story_guid = Feed.get_permalink(story)
                    if self.options['verbose']:
                        logging.debug(
                            u'   ---> [%-30s] ~FBReplacing guid (%s) with permalink: %s'
                            % (self.feed.log_title[:30], story.get('guid'),
                               new_story_guid))
                    story['guid'] = new_story_guid
            story['story_hash'] = MStory.feed_guid_hash_unsaved(
                self.feed.pk, story.get('guid'))
            stories.append(story)
            story_hashes.append(story.get('story_hash'))

        original_story_hash_count = len(story_hashes)
        story_hashes_in_unread_cutoff = self.feed.story_hashes_in_unread_cutoff[:
                                                                                original_story_hash_count]
        story_hashes.extend(story_hashes_in_unread_cutoff)
        story_hashes = list(set(story_hashes))
        if self.options['verbose'] or settings.DEBUG:
            logging.debug(
                u'   ---> [%-30s] ~FBFound ~SB%s~SN guids, adding ~SB%s~SN/%s guids from db'
                % (self.feed.log_title[:30], original_story_hash_count,
                   len(story_hashes) - original_story_hash_count,
                   len(story_hashes_in_unread_cutoff)))

        existing_stories = dict((s.story_hash, s) for s in MStory.objects(
            story_hash__in=story_hashes,
            # story_date__gte=start_date,
            # story_feed_id=self.feed.pk
        ))
        # if len(existing_stories) == 0:
        #     existing_stories = dict((s.story_hash, s) for s in MStory.objects(
        #         story_date__gte=start_date,
        #         story_feed_id=self.feed.pk
        #     ))

        ret_values = self.feed.add_update_stories(
            stories,
            existing_stories,
            verbose=self.options['verbose'],
            updates_off=self.options['updates_off'])

        # PubSubHubbub
        if (hasattr(self.fpf, 'feed') and hasattr(self.fpf.feed, 'links')
                and self.fpf.feed.links):
            hub_url = None
            self_url = self.feed.feed_address
            for link in self.fpf.feed.links:
                if link['rel'] == 'hub' and not hub_url:
                    hub_url = link['href']
                elif link['rel'] == 'self':
                    self_url = link['href']
            push_expired = False
            if self.feed.is_push:
                try:
                    push_expired = self.feed.push.lease_expires < datetime.datetime.now(
                    )
                except PushSubscription.DoesNotExist:
                    self.feed.is_push = False
            if (hub_url and self_url and not settings.DEBUG
                    and self.feed.active_subscribers > 0
                    and (push_expired or not self.feed.is_push
                         or self.options.get('force'))):
                logging.debug(
                    u'   ---> [%-30s] ~BB~FW%sSubscribing to PuSH hub: %s' %
                    (self.feed.log_title[:30],
                     "~SKRe-~SN" if push_expired else "", hub_url))
                try:
                    PushSubscription.objects.subscribe(self_url,
                                                       feed=self.feed,
                                                       hub=hub_url)
                except TimeoutError:
                    logging.debug(
                        u'   ---> [%-30s] ~BB~FW~FRTimed out~FW subscribing to PuSH hub: %s'
                        % (self.feed.log_title[:30], hub_url))
            elif (self.feed.is_push
                  and (self.feed.active_subscribers <= 0 or not hub_url)):
                logging.debug(
                    u'   ---> [%-30s] ~BB~FWTurning off PuSH, no hub found' %
                    (self.feed.log_title[:30]))
                self.feed.is_push = False
                self.feed = self.feed.save()

        # Push notifications
        if ret_values['new'] > 0 and MUserFeedNotification.feed_has_users(
                self.feed.pk) > 0:
            QueueNotifications.delay(self.feed.pk, ret_values['new'])

        # All Done
        logging.debug(
            u'   ---> [%-30s] ~FYParsed Feed: %snew=%s~SN~FY %sup=%s~SN same=%s%s~SN %serr=%s~SN~FY total=~SB%s'
            % (self.feed.log_title[:30], '~FG~SB' if ret_values['new'] else '',
               ret_values['new'], '~FY~SB' if ret_values['updated'] else '',
               ret_values['updated'], '~SB' if ret_values['same'] else '',
               ret_values['same'], '~FR~SB' if ret_values['error'] else '',
               ret_values['error'], len(self.fpf.entries)))
        self.feed.update_all_statistics(has_new_stories=bool(
            ret_values['new']),
                                        force=self.options['force'])
        fetch_date = datetime.datetime.now()
        if ret_values['new']:
            if not getattr(settings, 'TEST_DEBUG', False):
                self.feed.trim_feed()
                self.feed.expire_redis()
            if MStatistics.get('raw_feed', None) == self.feed.pk:
                self.feed.save_raw_feed(self.raw_feed, fetch_date)
        self.feed.save_feed_history(200, "OK", date=fetch_date)

        if self.options['verbose']:
            logging.debug(u'   ---> [%-30s] ~FBTIME: feed parse in ~FM%.4ss' %
                          (self.feed.log_title[:30], time.time() - start))

        return FEED_OK, ret_values
Example #49
0
    async def new_proxy(self, item):
        key = build_key(item)

        logger.debug('Got proxy: %s' % item)

        return await self.cli.hmset_dict(key, item)
Example #50
0
            if not self.fpf or self.options.get('force_fp', False):
                try:
                    self.fpf = feedparser.parse(address,
                                                agent=self.feed.user_agent,
                                                etag=etag,
                                                modified=modified)
                except (TypeError, ValueError, KeyError, EOFError,
                        MemoryError), e:
                    logging.debug(u'   ***> [%-30s] ~FRFeed fetch error: %s' %
                                  (self.feed.log_title[:30], e))
                    pass

        if not self.fpf:
            try:
                logging.debug(u'   ***> [%-30s] ~FRTurning off headers...' %
                              (self.feed.log_title[:30]))
                self.fpf = feedparser.parse(address,
                                            agent=self.feed.user_agent)
            except (TypeError, ValueError, KeyError, EOFError, MemoryError), e:
                logging.debug(u'   ***> [%-30s] ~FRFetch failed: %s.' %
                              (self.feed.log_title[:30], e))
                return FEED_ERRHTTP, None

        logging.debug(u'   ---> [%-30s] ~FYFeed fetch in ~FM%.4ss' %
                      (self.feed.log_title[:30], time.time() - start))

        return FEED_OK, self.fpf

    def get_identity(self):
        identity = "X"
Example #51
0
    def get_crushtype_id(self):

        log.debug('api testing with each type in the crush type')
        log.debug('****************************************')

        for each_id in self.json_crush_node:
            api = self.construct_api() + '/' + str(each_id['id'])
            log.debug('config with id %s' % str(each_id['id']))
            log.debug('api: %s' % api)

            response = self.auth.request('GET', api, verify=False)
            response.raise_for_status()
            log.debug('response: \n %s' % response.json())

            pretty_response = json.dumps(response.json(), indent=2)
            log.debug('pretty json response \n %s' % pretty_response)
Example #52
0
 def run(self, **kwargs):
     logging.debug(" ---> Sharing popular stories...")
     MSharedStory.share_popular_stories(interactive=False)
Example #53
0
 social_services = None
 if self.options.get('requesting_user_id', None):
     social_services = MSocialServices.get_user(
         self.options.get('requesting_user_id'))
     try:
         twitter_api = social_services.twitter_api()
     except tweepy.error.TweepError, e:
         logging.debug(
             u'   ***> [%-30s] ~FRTwitter fetch failed: %s: %s' %
             (self.feed.log_title[:30], self.address, e))
         return
 else:
     usersubs = UserSubscription.objects.filter(feed=self.feed)
     if not usersubs:
         logging.debug(
             u'   ***> [%-30s] ~FRTwitter fetch failed: %s: No subscriptions'
             % (self.feed.log_title[:30], self.address))
         return
     for sub in usersubs:
         social_services = MSocialServices.get_user(sub.user_id)
         if not social_services.twitter_uid: continue
         try:
             twitter_api = social_services.twitter_api()
             if not twitter_api:
                 continue
             else:
                 break
         except tweepy.error.TweepError, e:
             logging.debug(
                 u'   ***> [%-30s] ~FRTwitter fetch failed: %s: %s' %
                 (self.feed.log_title[:30], self.address, e))
Example #54
0
    def run(self, **kwargs):
        from apps.rss_feeds.models import Feed
        settings.LOG_TO_STREAM = True
        now = datetime.datetime.utcnow()
        start = time.time()
        r = redis.Redis(connection_pool=settings.REDIS_FEED_UPDATE_POOL)

        logging.debug(" ---> ~SN~FBQueuing broken feeds...")

        # Force refresh feeds
        refresh_feeds = Feed.objects.filter(
            active=True, fetched_once=False,
            active_subscribers__gte=1).order_by('?')[:100]
        refresh_count = refresh_feeds.count()
        cp1 = time.time()

        logging.debug(" ---> ~SN~FBFound %s active, unfetched broken feeds" %
                      refresh_count)

        # Mistakenly inactive feeds
        hours_ago = (now - datetime.timedelta(minutes=10)).strftime('%s')
        old_tasked_feeds = r.zrangebyscore('tasked_feeds', 0, hours_ago)
        inactive_count = len(old_tasked_feeds)
        if inactive_count:
            r.zremrangebyscore('tasked_feeds', 0, hours_ago)
            # r.sadd('queued_feeds', *old_tasked_feeds)
            for feed_id in old_tasked_feeds:
                r.zincrby('error_feeds', feed_id, 1)
                feed = Feed.get_by_id(feed_id)
                feed.set_next_scheduled_update()
        logging.debug(
            " ---> ~SN~FBRe-queuing ~SB%s~SN dropped/broken feeds (~SB%s/%s~SN queued/tasked)"
            %
            (inactive_count, r.scard('queued_feeds'), r.zcard('tasked_feeds')))
        cp2 = time.time()

        old = now - datetime.timedelta(days=1)
        old_feeds = Feed.objects.filter(
            next_scheduled_update__lte=old,
            active_subscribers__gte=1).order_by('?')[:500]
        old_count = old_feeds.count()
        cp3 = time.time()

        logging.debug(
            " ---> ~SN~FBTasking ~SBrefresh:~FC%s~FB inactive:~FC%s~FB old:~FC%s~SN~FB broken feeds... (%.4s/%.4s/%.4s)"
            % (
                refresh_count,
                inactive_count,
                old_count,
                cp1 - start,
                cp2 - cp1,
                cp3 - cp2,
            ))

        Feed.task_feeds(refresh_feeds, verbose=False)
        Feed.task_feeds(old_feeds, verbose=False)

        logging.debug(
            " ---> ~SN~FBTasking broken feeds took ~SB%s~SN seconds (~SB%s~SN/~FG%s~FB~SN/%s tasked/queued/scheduled)"
            % (int((time.time() - start)), r.zcard('tasked_feeds'),
               r.scard('queued_feeds'), r.zcard('scheduled_updates')))
 def construct_api(self):
     self.api = self.base_api + self.fsid + "/" + "crush_map"
     log.debug(self.api)
     return self.api
Example #56
0
 def construct_api(self):
     self.api = self.base_api + self.fsid + '/' + 'crush_type'
     log.debug(self.api)
     return self.api
Example #57
0
        'q': 0,
        '$d': 0
    }
    len_text = len(text_to_check)

    for i in range(0, len_text - 1):
        if text_to_check[i] == '<' and text_to_check[i + 1] == '<':
            count['<>'] += 1
        elif text_to_check[i] == '>' and text_to_check[i + 1] == '>':
            count['<>'] += 1
        elif text_to_check[i] == '|' and text_to_check[i + 1] == 'c':
            count['c'] += 1
        elif text_to_check[i] == '|' and text_to_check[i + 1] == 't':
            count['t'] += 1
        elif text_to_check[i] == '\\' and text_to_check[i + 1] == '\\':
            count['bs'] += 1
        # elif text_to_check[i] == '\\' and text_to_check[i+1] == 'n':
        #     count['n'] += 1
        elif text_to_check[i] == '\\' and text_to_check[i + 1] == '"':
            count['q'] += 1
        elif text_to_check[i] == '$' and text_to_check[i + 1] == 'd':
            count['$d'] += 1
    return count


if __name__ == '__main__':
    log.debug('main() with args: %s' % str(sys.argv))
    if os.name == 'nt':
        sys.stdout = open(1, 'w', encoding='utf-8', closefd=False)  # windows
    main()
Example #58
0
 def _run_process(self, args):
     log.debug('Running command: \'{}\''.format(' '.join(args)))
     process = Popen(args, stdout=PIPE)
     (output, err) = process.communicate()
     return process.wait(), output, err
Example #59
0
class FetchFeed:
    def __init__(self, feed_id, options):
        self.feed = Feed.get_by_id(feed_id)
        self.options = options
        self.fpf = None

    @timelimit(150)
    def fetch(self):
        """ 
        Uses feedparser to download the feed. Will be parsed later.
        """
        start = time.time()
        identity = self.get_identity()
        log_msg = u'%2s ---> [%-30s] ~FYFetching feed (~FB%d~FY), last update: %s' % (
            identity, self.feed.title[:30], self.feed.id,
            datetime.datetime.now() - self.feed.last_update)
        logging.debug(log_msg)

        etag = self.feed.etag
        modified = self.feed.last_modified.utctimetuple(
        )[:7] if self.feed.last_modified else None
        address = self.feed.feed_address

        if (self.options.get('force') or random.random() <= .01):
            modified = None
            etag = None
            address = cache_bust_url(address)
            logging.debug(u'   ---> [%-30s] ~FBForcing fetch: %s' %
                          (self.feed.title[:30], address))
        elif (not self.feed.fetched_once or not self.feed.known_good):
            modified = None
            etag = None

        USER_AGENT = ('NewsBlur Feed Fetcher - %s subscriber%s - %s '
                      '(Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_1) '
                      'AppleWebKit/534.48.3 (KHTML, like Gecko) Version/5.1 '
                      'Safari/534.48.3)' % (
                          self.feed.num_subscribers,
                          's' if self.feed.num_subscribers != 1 else '',
                          self.feed.permalink,
                      ))
        if self.options.get('feed_xml'):
            logging.debug(
                u'   ---> [%-30s] ~FM~BKFeed has been fat pinged. Ignoring fat: %s'
                % (self.feed.title[:30], len(self.options.get('feed_xml'))))

        if self.options.get('fpf'):
            self.fpf = self.options.get('fpf')
            logging.debug(
                u'   ---> [%-30s] ~FM~BKFeed fetched in real-time with fat ping.'
                % (self.feed.title[:30]))
            return FEED_OK, self.fpf

        try:
            self.fpf = feedparser.parse(address,
                                        agent=USER_AGENT,
                                        etag=etag,
                                        modified=modified)
        except (TypeError, ValueError, KeyError), e:
            logging.debug(u'   ***> [%-30s] ~FR%s, turning off headers.' %
                          (self.feed.title[:30], e))
            self.fpf = feedparser.parse(address, agent=USER_AGENT)

        logging.debug(u'   ---> [%-30s] ~FYFeed fetch in ~FM%.4ss' %
                      (self.feed.title[:30], time.time() - start))

        return FEED_OK, self.fpf
Example #60
0
    def test_order_closest(self):
        id0 = Id(BIN_ID0)
        ordered_list = [
            Id('\x00' * ID_SIZE_BYTES),
            Id(BIN_ID0[:-1] + '\x06'),
            Id(BIN_ID0[:9] + '\x01' * (ID_SIZE_BYTES - 9)),
            Id(BIN_ID0[:7] + '\xff' * (ID_SIZE_BYTES - 7)),
            Id(BIN_ID0[:7] + '\xff' * (ID_SIZE_BYTES - 7)),
            Id('\x00' + '\xff' * (ID_SIZE_BYTES - 1)),
            Id('\x53' * ID_SIZE_BYTES),
            Id('\xff' * ID_SIZE_BYTES),
        ]
        random_list = random.sample(ordered_list, len(ordered_list))

        random_list_copy = random_list[:]

        log.debug('ordered list')
        for e in ordered_list:
            log.debug('%s' % e)
        log.debug('random order')
        for e in random_list:
            log.debug('%s' % e)

        result_list = id0.order_closest(random_list)
        log.debug('order_closest result')
        for e in result_list:
            log.debug('%s' % e)
        log.debug('random order (it should not change)')
        for e in random_list:
            log.debug('%s' % e)

        # make sure order_closest does not modify random_list
        assert random_list == random_list_copy

        for i, ordered_id in enumerate(ordered_list):
            log.debug('%d, %s, %s' % (i, ordered_id, result_list[i]))
            assert ordered_id.bin_id == result_list[i].bin_id