コード例 #1
0
def cross_sync(increment):

	sess = db.get_db_session()
	print("Loading extant rows...")
	old_nu_items = sess.query(db.NuOutboundWrapperMap).order_by(desc(db.NuOutboundWrapperMap.id)).all()
	print("Loaded. Processing")
	have_count = 0
	new_count  = 0
	loops = 0
	nc_loops = 0
	try:

		for old_nu in old_nu_items:
			have = sess.query(db.NuReleaseItem)                                     \
				.options(joinedload('resolved'))                                    \
				.filter(db.NuReleaseItem.outbound_wrapper==old_nu.outbound_wrapper) \
				.scalar()
			if not have:
				have = db.NuReleaseItem(
						validated        = old_nu.validated,
						seriesname       = old_nu.seriesname,
						releaseinfo      = old_nu.releaseinfo,
						groupinfo        = old_nu.groupinfo,
						referrer         = old_nu.referrer,
						outbound_wrapper = old_nu.outbound_wrapper,
						first_seen       = old_nu.released_on,
						actual_target    = old_nu.actual_target,
					)
				sess.add(have)
				loops += 1
				new_count += 1


			old_key = (old_nu.client_id, old_nu.client_key, old_nu.actual_target)
			resolved = set([(itm.client_id, itm.client_key, itm.actual_target) for itm in have.resolved])
			if not old_key in resolved:
				new = db.NuResolvedOutbound(
						client_id      = old_nu.client_id,
						client_key     = old_nu.client_key,
						actual_target  = old_nu.actual_target,
						fetched_on     = old_nu.released_on,
					)
				have.resolved.append(new)
				loops += 1
				new_count += 1
			else:
				have_count += 1
				nc_loops += 1

			if loops > increment:
				print("Commit! Have {}, new {} ({}, {})".format(have_count, new_count, loops, nc_loops))
				sess.commit()
				loops = 0

			if nc_loops > 100:
				print("Have {}, new {} ({}, {})".format(have_count, new_count, loops, nc_loops))
				nc_loops = 0
		sess.commit()
	except Exception:
		sess.rollback()
		raise
コード例 #2
0
    def __addNewLinks(self, link_items):
        '''
		Example release sections:
		{
		    'seriesname': 'Gate of Revelation',
		    'releaseinfo': 'c203',
		    'groupinfo': 'daoseekerblog',
		    'referrer': 'http://www.novelupdates.com/',
		    'outbound_wrapper': 'http://www.novelupdates.com/extnu/327682/',
		    'actual_target': None
		}, {
		    'seriesname': 'Mai Kitsune Waifu',
		    'releaseinfo': 'c174',
		    'groupinfo': 'subudai11',
		    'referrer': 'http://www.novelupdates.com/',
		    'outbound_wrapper': 'http://www.novelupdates.com/extnu/327678/',
		    'actual_target': None
		}, {
		    'seriesname': 'Mai Kitsune Waifu',
		    'releaseinfo': 'c174',
		    'groupinfo': 'subudai11',
		    'referrer': 'http://www.novelupdates.com/',
		    'outbound_wrapper': 'http://www.novelupdates.com/extnu/327674/',
		    'actual_target': None
		}

		'''

        commit_each = False
        while 1:
            try:
                new_count = 0
                for item in link_items:
                    have = self.db_sess.query(db.NuReleaseItem)                                     \
                     .filter(db.NuReleaseItem.outbound_wrapper==item['outbound_wrapper']) \
                     .scalar()

                    if not have:
                        self.log.info("New: '%s' -> '%s' : '%s'",
                                      item['seriesname'], item['releaseinfo'],
                                      item['groupinfo'])
                        have = db.NuReleaseItem(
                            validated=False,
                            reviewed='unverified',
                            seriesname=item['seriesname'],
                            releaseinfo=item['releaseinfo'],
                            groupinfo=item['groupinfo'],
                            referrer=item['referrer'],
                            outbound_wrapper=item['outbound_wrapper'],
                            first_seen=datetime.datetime.now(),
                        )
                        self.db_sess.add(have)
                        new_count += 1
                        if commit_each:
                            self.db_sess.commit()
                self.db_sess.commit()
                break

            except (sqlalchemy.exc.InvalidRequestError,
                    sqlalchemy.exc.OperationalError,
                    sqlalchemy.exc.IntegrityError):
                if commit_each == False:
                    lfunc = self.log.warning
                else:
                    lfunc = self.log.error

                lfunc("Error when inserting items!")
                if not commit_each:
                    lfunc("Retrying with commit_each")
                if commit_each:
                    for line in traceback.format_exc().strip().split("\n"):
                        lfunc("%s", line.rstrip())
                self.db_sess.rollback()
                commit_each = True

        self.log.info("Found %s release links on page, %s of which were new!",
                      len(link_items), new_count)
コード例 #3
0
    def __addNewLinks(self, base_url, link_items):
        '''
		Example release sections:
		{
		    'seriesname': 'Gate of Revelation',
		    'releaseinfo': 'c203',
		    'groupinfo': 'daoseekerblog',
		    'referrer': 'https://www.novelupdates.com/',
		    'outbound_wrapper': 'https://www.novelupdates.com/extnu/327682/',
		    'actual_target': None
		}, {
		    'seriesname': 'Mai Kitsune Waifu',
		    'releaseinfo': 'c174',
		    'groupinfo': 'subudai11',
		    'referrer': 'https://www.novelupdates.com/',
		    'outbound_wrapper': 'https://www.novelupdates.com/extnu/327678/',
		    'actual_target': None
		}, {
		    'seriesname': 'Mai Kitsune Waifu',
		    'releaseinfo': 'c174',
		    'groupinfo': 'subudai11',
		    'referrer': 'https://www.novelupdates.com/',
		    'outbound_wrapper': 'https://www.novelupdates.com/extnu/327674/',
		    'actual_target': None
		}
		'''

        commit_each = False
        while 1:
            try:
                new_count = 0
                for item in link_items:
                    have = self.db_sess.query(db.NuReleaseItem)                                     \
                     .filter(db.NuReleaseItem.outbound_wrapper==item['outbound_wrapper']) \
                     .scalar()

                    if not have:
                        self.log.info("New: '%s' -> '%s' : '%s'",
                                      item['seriesname'], item['releaseinfo'],
                                      item['groupinfo'])
                        have = db.NuReleaseItem(
                            validated=False,
                            reviewed='unverified',
                            seriesname=item['seriesname'].strip(),
                            releaseinfo=item['releaseinfo'].strip(),
                            groupinfo=item['groupinfo'].strip(),
                            referrer=item['referrer'],
                            outbound_wrapper=item['outbound_wrapper'],
                            first_seen=datetime.datetime.now(),
                            release_date=item['release_date'],
                            fetch_attempts=0,
                            local_fetch_attempts=0,
                        )
                        self.db_sess.add(have)
                        new_count += 1
                        if commit_each:
                            self.db_sess.commit()

                        self.mon_con.incr('new-urls', 1)
                    else:
                        delta = have.release_date - item['release_date']
                        if delta.total_seconds() > days(2):
                            self.log.info(
                                "Item release date looks invalid. Fixing (%s, %s)",
                                delta, delta.total_seconds())
                            have.release_date = item['release_date']
                            if commit_each:
                                self.db_sess.commit()

                    # This shouldn't happen given the default value, but it is. Not sure how.
                    try:
                        int(have.local_fetch_attempts)
                    except:
                        have.local_fetch_attempts = 0

                    if have.reviewed == 'unverified' and have.fetch_attempts == 0 and have.local_fetch_attempts <= 2:
                        try:
                            self.__load_referrer(base_url, have)
                        except Exception as e:

                            self.log.info("Failure resolving item for '%s'",
                                          have.outbound_wrapper)
                            self.log.info("TL Group: %s. Series %s, chap: %s",
                                          have.groupinfo, have.seriesname,
                                          have.releaseinfo)
                            for line in traceback.format_exc().strip().split(
                                    "\n"):
                                self.log.error("%s", line.rstrip())

                            have.local_fetch_attempts += 1

                            try:
                                self.log.warning("Rolling back")
                                self.db_sess.rollback()
                            except Exception as e:
                                self.log.critical(
                                    "Failure in rollback for '%s'",
                                    have.outbound_wrapper)
                                for line in traceback.format_exc().strip(
                                ).split("\n"):
                                    self.log.error("%s", line.rstrip())
                                pass

                self.db_sess.commit()
                break

            except (sqlalchemy.exc.InvalidRequestError,
                    sqlalchemy.exc.OperationalError,
                    sqlalchemy.exc.IntegrityError):
                if commit_each == False:
                    lfunc = self.log.warning
                else:
                    lfunc = self.log.error

                lfunc("Error when inserting items!")
                if not commit_each:
                    lfunc("Retrying with commit_each")
                if commit_each:
                    for line in traceback.format_exc().strip().split("\n"):
                        lfunc("%s", line.rstrip())
                self.db_sess.rollback()
                commit_each = True

        self.log.info("Found %s release links on page, %s of which were new!",
                      len(link_items), new_count)