def cross_sync(increment): sess = db.get_db_session() print("Loading extant rows...") old_nu_items = sess.query(db.NuOutboundWrapperMap).order_by(desc(db.NuOutboundWrapperMap.id)).all() print("Loaded. Processing") have_count = 0 new_count = 0 loops = 0 nc_loops = 0 try: for old_nu in old_nu_items: have = sess.query(db.NuReleaseItem) \ .options(joinedload('resolved')) \ .filter(db.NuReleaseItem.outbound_wrapper==old_nu.outbound_wrapper) \ .scalar() if not have: have = db.NuReleaseItem( validated = old_nu.validated, seriesname = old_nu.seriesname, releaseinfo = old_nu.releaseinfo, groupinfo = old_nu.groupinfo, referrer = old_nu.referrer, outbound_wrapper = old_nu.outbound_wrapper, first_seen = old_nu.released_on, actual_target = old_nu.actual_target, ) sess.add(have) loops += 1 new_count += 1 old_key = (old_nu.client_id, old_nu.client_key, old_nu.actual_target) resolved = set([(itm.client_id, itm.client_key, itm.actual_target) for itm in have.resolved]) if not old_key in resolved: new = db.NuResolvedOutbound( client_id = old_nu.client_id, client_key = old_nu.client_key, actual_target = old_nu.actual_target, fetched_on = old_nu.released_on, ) have.resolved.append(new) loops += 1 new_count += 1 else: have_count += 1 nc_loops += 1 if loops > increment: print("Commit! Have {}, new {} ({}, {})".format(have_count, new_count, loops, nc_loops)) sess.commit() loops = 0 if nc_loops > 100: print("Have {}, new {} ({}, {})".format(have_count, new_count, loops, nc_loops)) nc_loops = 0 sess.commit() except Exception: sess.rollback() raise
def __addNewLinks(self, link_items): ''' Example release sections: { 'seriesname': 'Gate of Revelation', 'releaseinfo': 'c203', 'groupinfo': 'daoseekerblog', 'referrer': 'http://www.novelupdates.com/', 'outbound_wrapper': 'http://www.novelupdates.com/extnu/327682/', 'actual_target': None }, { 'seriesname': 'Mai Kitsune Waifu', 'releaseinfo': 'c174', 'groupinfo': 'subudai11', 'referrer': 'http://www.novelupdates.com/', 'outbound_wrapper': 'http://www.novelupdates.com/extnu/327678/', 'actual_target': None }, { 'seriesname': 'Mai Kitsune Waifu', 'releaseinfo': 'c174', 'groupinfo': 'subudai11', 'referrer': 'http://www.novelupdates.com/', 'outbound_wrapper': 'http://www.novelupdates.com/extnu/327674/', 'actual_target': None } ''' commit_each = False while 1: try: new_count = 0 for item in link_items: have = self.db_sess.query(db.NuReleaseItem) \ .filter(db.NuReleaseItem.outbound_wrapper==item['outbound_wrapper']) \ .scalar() if not have: self.log.info("New: '%s' -> '%s' : '%s'", item['seriesname'], item['releaseinfo'], item['groupinfo']) have = db.NuReleaseItem( validated=False, reviewed='unverified', seriesname=item['seriesname'], releaseinfo=item['releaseinfo'], groupinfo=item['groupinfo'], referrer=item['referrer'], outbound_wrapper=item['outbound_wrapper'], first_seen=datetime.datetime.now(), ) self.db_sess.add(have) new_count += 1 if commit_each: self.db_sess.commit() self.db_sess.commit() break except (sqlalchemy.exc.InvalidRequestError, sqlalchemy.exc.OperationalError, sqlalchemy.exc.IntegrityError): if commit_each == False: lfunc = self.log.warning else: lfunc = self.log.error lfunc("Error when inserting items!") if not commit_each: lfunc("Retrying with commit_each") if commit_each: for line in traceback.format_exc().strip().split("\n"): lfunc("%s", line.rstrip()) self.db_sess.rollback() commit_each = True self.log.info("Found %s release links on page, %s of which were new!", len(link_items), new_count)
def __addNewLinks(self, base_url, link_items): ''' Example release sections: { 'seriesname': 'Gate of Revelation', 'releaseinfo': 'c203', 'groupinfo': 'daoseekerblog', 'referrer': 'https://www.novelupdates.com/', 'outbound_wrapper': 'https://www.novelupdates.com/extnu/327682/', 'actual_target': None }, { 'seriesname': 'Mai Kitsune Waifu', 'releaseinfo': 'c174', 'groupinfo': 'subudai11', 'referrer': 'https://www.novelupdates.com/', 'outbound_wrapper': 'https://www.novelupdates.com/extnu/327678/', 'actual_target': None }, { 'seriesname': 'Mai Kitsune Waifu', 'releaseinfo': 'c174', 'groupinfo': 'subudai11', 'referrer': 'https://www.novelupdates.com/', 'outbound_wrapper': 'https://www.novelupdates.com/extnu/327674/', 'actual_target': None } ''' commit_each = False while 1: try: new_count = 0 for item in link_items: have = self.db_sess.query(db.NuReleaseItem) \ .filter(db.NuReleaseItem.outbound_wrapper==item['outbound_wrapper']) \ .scalar() if not have: self.log.info("New: '%s' -> '%s' : '%s'", item['seriesname'], item['releaseinfo'], item['groupinfo']) have = db.NuReleaseItem( validated=False, reviewed='unverified', seriesname=item['seriesname'].strip(), releaseinfo=item['releaseinfo'].strip(), groupinfo=item['groupinfo'].strip(), referrer=item['referrer'], outbound_wrapper=item['outbound_wrapper'], first_seen=datetime.datetime.now(), release_date=item['release_date'], fetch_attempts=0, local_fetch_attempts=0, ) self.db_sess.add(have) new_count += 1 if commit_each: self.db_sess.commit() self.mon_con.incr('new-urls', 1) else: delta = have.release_date - item['release_date'] if delta.total_seconds() > days(2): self.log.info( "Item release date looks invalid. Fixing (%s, %s)", delta, delta.total_seconds()) have.release_date = item['release_date'] if commit_each: self.db_sess.commit() # This shouldn't happen given the default value, but it is. Not sure how. try: int(have.local_fetch_attempts) except: have.local_fetch_attempts = 0 if have.reviewed == 'unverified' and have.fetch_attempts == 0 and have.local_fetch_attempts <= 2: try: self.__load_referrer(base_url, have) except Exception as e: self.log.info("Failure resolving item for '%s'", have.outbound_wrapper) self.log.info("TL Group: %s. Series %s, chap: %s", have.groupinfo, have.seriesname, have.releaseinfo) for line in traceback.format_exc().strip().split( "\n"): self.log.error("%s", line.rstrip()) have.local_fetch_attempts += 1 try: self.log.warning("Rolling back") self.db_sess.rollback() except Exception as e: self.log.critical( "Failure in rollback for '%s'", have.outbound_wrapper) for line in traceback.format_exc().strip( ).split("\n"): self.log.error("%s", line.rstrip()) pass self.db_sess.commit() break except (sqlalchemy.exc.InvalidRequestError, sqlalchemy.exc.OperationalError, sqlalchemy.exc.IntegrityError): if commit_each == False: lfunc = self.log.warning else: lfunc = self.log.error lfunc("Error when inserting items!") if not commit_each: lfunc("Retrying with commit_each") if commit_each: for line in traceback.format_exc().strip().split("\n"): lfunc("%s", line.rstrip()) self.db_sess.rollback() commit_each = True self.log.info("Found %s release links on page, %s of which were new!", len(link_items), new_count)