Beispiel #1
0
	def write( self, output ):
		"""Writes the catalogue to the given output, this triggers a walk
		of the catalogue."""
		for i, t, p in self.walk():
			assert t in TYPES
			try:
				line = bytes("{0}{3}{1}{3}{2}{4}".format(i,t,p, self.FIELD_SEPARATOR, self.LINE_SEPARATOR), "utf8")
				output.write(line)
			except UnicodeEncodeError as e:
				logging.error("Catalogue: exception occured {0}".format(e))
Beispiel #2
0
    def write(self, output):
        """Writes the catalogue to the given output, this triggers a walk
		of the catalogue."""
        for i, t, p in self.walk():
            assert t in TYPES
            try:
                line = bytes(
                    "{0}{3}{1}{3}{2}{4}".format(i, t, p, self.FIELD_SEPARATOR,
                                                self.LINE_SEPARATOR), "utf8")
                output.write(line)
            except UnicodeEncodeError as e:
                logging.error("Catalogue: exception occured {0}".format(e))
Beispiel #3
0
 def Groups(cls, lines):
     """Creates groups out of the lines generated by `ParseLines`"""
     mode = None
     current = root = Group(type=TYPE_FILE)
     result = [current]
     lines = list(lines)
     for i, t, l in lines:
         if t == TYPE_SYMBOL:
             current = Group(
                 type=TYPE_SYMBOL,
                 classifier=l.strip(),
                 start=i,
             )
             result.append(current)
         elif t == TYPE_DOC:
             if not current.code:
                 current.doc.append(l)
             else:
                 current = Group(
                     type=TYPE_FILE,
                     doc=[l],
                     start=i,
                 )
                 result.append(current)
         elif t == TYPE_CODE:
             current.code.append(l)
         else:
             assert None
     # Now we post_process groups
     r = []
     for i, group in enumerate(result):
         if group.type == TYPE_SYMBOL:
             first_line = None
             try:
                 first_line = (_ for _ in group.code if _).next()
             except StopIteration:
                 reporter.error("Group has no code: {0}".format(group))
             if first_line:
                 match = SYMBOL_EXTRACTORS[group.classifier].match(
                     first_line)
                 assert match, "Symbol extractor {0} cannot match {1}".format(
                     group.classifier, first_line)
                 group.name = match.groups()[-1]
                 root.symbols[group.name] = group
                 r.append(group)
             else:
                 reporter.warn(
                     "Filtered out empty group: {0} at {1}".format(
                         group, lines[group.start]))
     return r
Beispiel #4
0
	def Groups( cls, lines ):
		"""Creates groups out of the lines generated by `ParseLines`"""
		mode    = None
		current = root = Group(type=TYPE_FILE)
		result = [current]
		lines  = list(lines)
		for i,t,l in lines:
			if   t == TYPE_SYMBOL:
				current = Group(
					type       = TYPE_SYMBOL,
					classifier = l.strip(),
					start      = i,
				)
				result.append(current)
			elif t == TYPE_DOC:
				if not current.code:
					current.doc.append(l)
				else:
					current  = Group(
						type = TYPE_FILE,
						doc  = [l],
						start = i,
					)
					result.append(current)
			elif t == TYPE_CODE:
				current.code.append(l)
			else:
				assert None
		# Now we post_process groups
		r = []
		for i,group in enumerate(result):
			if group.type == TYPE_SYMBOL:
				first_line = None
				try:
					first_line  = (_ for _ in group.code if _).next()
				except StopIteration:
					reporter.error("Group has no code: {0}".format(group))
				if first_line:
					match       = SYMBOL_EXTRACTORS[group.classifier].match(first_line)
					assert match, "Symbol extractor {0} cannot match {1}".format(group.classifier, first_line)
					group.name = match.groups()[-1]
					root.symbols[group.name] = group
					r.append(group)
				else:
					reporter.warn("Filtered out empty group: {0} at {1}".format(group, lines[group.start]))
		return r
Beispiel #5
0
def run(args):
    sources = [os.path.abspath(_) for _ in args.source]
    base = os.path.commonprefix(sources)
    if not os.path.exists(base) or not os.path.isdir(base):
        base = os.path.dirname(base)
    for s in sources:
        if not os.path.exists(s):
            logging.error("Source path does not exists: {0}".format(s))
            return None
    # We setup the filter
    node_filter = Filter(types=args.type, names=args.name)
    # We log the information about the sources
    logging.info("Using base: {0}".format(base))
    for _ in sources:
        logging.info("Using source: {0}".format(_))
    # Sometimes the sources have a common filename prefix, so make sure it is
    # a directory or we get its dirname
    # Now we create the catalogue
    if not (args.catalogue or args.output):
        logging.error("Either catalogue or output directory are required")
        return -1
    # Now we retrieve/create the catalogue
    cat_path = args.catalogue or os.path.join(args.output, "__rawcopy__",
                                              "catalogue.lst")
    if not os.path.exists(cat_path):
        logging.info("Creating source catalogue at {0}".format(cat_path))
        c = Catalogue(sources, base, node_filter)
        c.save(cat_path)
    elif args.catalogue_only:
        logging.info("Catalogue-only mode, regenerating the catalogue")
        c = Catalogue(sources, base, node_filter)
        c.save(cat_path)
    # Now we iterate over the catalogue
    if args.catalogue_only:
        logging.info(
            "Catalogue-only mode, skipping copy. Remove -C option to do the actual copy"
        )
    elif args.list:
        # FIXME: Use a copy with no action
        c = Copy(args.output, node_filter)
        r = args.range
        c.fromCatalogue(cat_path,
                        range=r,
                        test=True,
                        callback=lambda i, t, p, s, d: sys.stdout.write(
                            "{0}\t{1}\t{2}\t{3}\t{4}\n".format(i, t, p, s, d)))
    elif args.output:
        logging.info("Copy catalogue's contents to {0}".format(args.output))
        c = Copy(args.output, node_filter)
        r = args.range
        if r:
            try:
                r = [int(_ or -1) for _ in r.split("-")]
            except ValueError as e:
                logging.error("Unsupported range format. Expects `start-end`")
                return -1
            logging.info("Using catalogue item range: {0}".format(r))
        if args.test:
            logging.info("Test mode enabled (not actual file copy)".format(r))
        c.fromCatalogue(cat_path, range=r, test=args.test)
Beispiel #6
0
def run( args ):
	sources = [os.path.abspath(_) for _ in args.source]
	base    = os.path.commonprefix(sources)
	if not os.path.exists(base) or not os.path.isdir(base): base = os.path.dirname(base)
	for s in sources:
		if not os.path.exists(s):
			logging.error("Source path does not exists: {0}".format(s))
			return None
	# We setup the filter
	node_filter = Filter(types=args.type, names=args.name)
	# We log the information about the sources
	logging.info("Using base: {0}".format(base))
	for _ in sources: logging.info("Using source: {0}".format(_))
	# Sometimes the sources have a common filename prefix, so make sure it is
	# a directory or we get its dirname
	# Now we create the catalogue
	if not (args.catalogue or args.output):
		logging.error("Either catalogue or output directory are required")
		return -1
	# Now we retrieve/create the catalogue
	cat_path = args.catalogue or os.path.join(args.output, "__rawcopy__", "catalogue.lst")
	if not os.path.exists(cat_path):
		logging.info("Creating source catalogue at {0}".format(cat_path))
		c = Catalogue(sources, base, node_filter)
		c.save(cat_path)
	elif args.catalogue_only:
		logging.info("Catalogue-only mode, regenerating the catalogue")
		c = Catalogue(sources, base, node_filter)
		c.save(cat_path)
	# Now we iterate over the catalogue
	if args.catalogue_only:
		logging.info("Catalogue-only mode, skipping copy. Remove -C option to do the actual copy")
	elif args.list:
		# FIXME: Use a copy with no action
		c = Copy(args.output, node_filter)
		r = args.range
		c.fromCatalogue(cat_path, range=r, test=True, callback=lambda i,t,p,s,d:sys.stdout.write("{0}\t{1}\t{2}\t{3}\t{4}\n".format(i,t,p,s,d)))
	elif args.output:
		logging.info("Copy catalogue's contents to {0}".format(args.output))
		c = Copy(args.output, node_filter)
		r = args.range
		if r:
			try:
				r = [int(_ or -1) for _ in r.split("-")]
			except ValueError as e:
				logging.error("Unsupported range format. Expects `start-end`")
				return -1
			logging.info("Using catalogue item range: {0}".format(r))
		if args.test:
			logging.info("Test mode enabled (not actual file copy)".format(r))
		c.fromCatalogue(cat_path, range=r, test=args.test)
Beispiel #7
0
    def fromCatalogue(self, path, range=None, test=False, callback=None):
        """Reads the given catalogue and copies directories, symlinks and files
		listed in the catalogue. Note that this expects the catalogue to
		be in traversal order."""
        logging.info("Opening catalogue: {0}".format(path))
        # The base is the common prefix/ancestor of all the paths in the
        # catalogue. The root changes but will always start with the base.
        base = None
        root = None
        self.test = test
        # When no range is specified, we look for the index path
        # and load it.
        if range is None and os.path.exists(
                self._indexPath) and os.stat(path)[stat.ST_MTIME] <= os.stat(
                    self._indexPath)[stat.ST_MTIME]:
            with open(self._indexPath, "r") as f:
                r = f.read()
            try:
                r = int(r)
                range = (r, -1)
            except ValueError as e:
                pass
        with open(path, "r") as f:
            for line in f:
                j_t_p = line.split(Catalogue.FIELD_SEPARATOR, 2)
                if len(j_t_p) != 3:
                    logging.error(
                        "Malformed line, expecting at least 3 colon-separated values: {0}"
                        .format(repr(line)))
                    continue
                j, t, p = j_t_p
                p = p[:-1]
                i = int(j)
                self.last = i
                if t == TYPE_BASE:
                    # The first line of the catalogue is expected to be the base
                    # it is also expected to be absolute.
                    self.base = base = p
                    assert os.path.exists(
                        p), "Base directory does not exists: {0}".format(
                            utf8(p))
                    # Once we have the base, we can create rawcopy's DB files
                    rd = os.path.join(self.output, "__rawcopy__")
                    if not os.path.exists(rd):
                        logging.info(
                            "Creating rawcopy database directory {0}".format(
                                utf8(rd)))
                        os.makedirs(rd)
                    self._open(os.path.join(rd, "copy.db"))
                elif t == TYPE_ROOT:
                    # If we found a root, we ensure that it is prefixed with the
                    # base
                    assert base, "Catalogue must have a base directory before having roots"
                    assert os.path.normpath(p).startswith(
                        os.path.normpath(base)
                    ), "Catalogue roots must be prefixed by the base, base={0}, root={1}".format(
                        utf8(base), utf8(p))
                    # Now we extract the suffix, which is the root minus the base
                    # and no leading /
                    self.root = root = p
                    source = p
                    suffix = p[len(self.base):]
                    if suffix and suffix[0] == "/": suffix = suffix[1:]
                    destination = os.path.join(
                        os.path.join(self.output, suffix))
                    if not (os.path.exists(destination)
                            and not os.path.islink(destination)):
                        pd = os.path.dirname(destination)
                        logging.info("Creating root: {0}:{1}".format(
                            i, utf8(p)))
                        # We make sure the source exists
                        if not os.path.exists(source) and not os.path.islink(
                                source):
                            logging.info(
                                "Root does not exists: {0}:{1}".format(
                                    i, utf8(p)))
                        # TODO: How do we handle filters at this stage?
                        # We make sure the parent destination exists (it should be the case)
                        if not os.path.exists(pd):
                            # We copy the original parent directory
                            self.copydir(p, pd, suffix)
                        if os.path.isdir(source):
                            self.copydir(p, destination, suffix)
                        elif os.path.islink(source):
                            self.copylink(p, destination, suffix)
                        elif os.path.isfile(source):
                            self.copyfile(p, destination, suffix)
                        else:
                            logging.error(
                                "Unsupported root (not a dir/link/file): {0}:{1}"
                                .format(i, utf8(p)))
                else:
                    # We skip the indexes that are not within the range, if given
                    if range:
                        if i < range[0]: continue
                        if len(range) > 1 and range[1] >= 0 and i > range[1]:
                            logging.info(
                                "Reached end of range {0} >= {1}".format(
                                    i, range[1]))
                            break
                    # We check if the filter matches
                    if not self.match(p, t):
                        continue
                    assert root and self.output
                    # We prepare the source, suffix and destination
                    source = os.path.join(root, p)
                    assert source.startswith(
                        base
                    ), "os.path.join(root={0}, path={1}) expected to start with base={2}".format(
                        repr(root), repr(p), repr(base))
                    suffix = source[len(base):]
                    if suffix[0] == "/": suffix = suffix[1:]
                    destination = os.path.join(
                        os.path.join(self.output, suffix))
                    assert suffix, "Empty suffix: source={0}, path={1}, destination={2}".format(
                        utf8(source), utf(p), utf8(destination))
                    # We now proceed with the actual copy
                    if not (os.path.exists(source) or os.path.islink(source)):
                        logging.error(
                            "Source path not available: {0}:{1}".format(
                                i, utf8(source)))
                    elif not (os.path.exists(destination)
                              or os.path.islink(destination)):
                        logging.info("Copying path [{2}] {0}:{1}".format(
                            i, utf8(p), t))
                        if t == TYPE_DIR or os.path.isdir(source):
                            if t != TYPE_DIR:
                                logging.warn(
                                    "Source detected as directory, but typed as {0} -- {1}:{2}"
                                    .format(t, i, utf8(p)))
                            self.copydir(source, destination, p)
                        elif t == TYPE_SYMLINK:
                            self.copylink(source, destination, p)
                        elif t == TYPE_FILE:
                            self.copyfile(source, destination, p)
                        else:
                            logging.error(
                                "Copy: line {0} unsupported type {1}".format(
                                    i, t, p))
                    elif not self.test:
                        # We only fo there if we're not in test mode
                        if t == TYPE_DIR:
                            logging.info(
                                "Skipping already copied directory: {0}:{1}".
                                format(i, utf8(destination)))
                        elif t == TYPE_SYMLINK:
                            logging.info(
                                "Skipping already copied link: {0}:{1}".format(
                                    i, utf8(destination)))
                        elif t == TYPE_FILE:
                            logging.info(
                                "Skipping already copied file: {0}:{1}".format(
                                    i, utf8(destination)))
                        # TODO: We should repair a damaged DB and make sure the inode is copied
                        self.ensureInodePath(source, suffix)
                    # We call the callback
                    if callback:
                        callback(i, t, p, source, destination)
                # We sync the database every 1000 item
                if j.endswith("000") and (not range or i >= range[0]):
                    logging.info("{0} items processed, syncing db".format(i))
                    self._sync(j)
        # We don't forget to close the DB
        self._close()
Beispiel #8
0
import reporter

patch.apply()
reporter.set_version("like-ff-2.1-try")  # set a version tag

SLEEP_BETWEEN_EACH_LIKE = 20

session = InstaPy(bypass_suspicious_attempt=True,
                  headless_browser=True,
                  use_firefox=True,
                  **reporter.Arguments().all())

with smart_run(session):
    while True:
        cur = time.time()
        try:
            time.sleep(SLEEP_BETWEEN_EACH_LIKE)
            session.like_by_tags(['love'], amount=1, interact=False)

            time.sleep(SLEEP_BETWEEN_EACH_LIKE)
            session.like_by_tags(['instagood'], amount=1, interact=False)

            time.sleep(SLEEP_BETWEEN_EACH_LIKE)
            session.like_by_tags(['photooftheday'], amount=1, interact=False)

            time.sleep(SLEEP_BETWEEN_EACH_LIKE)
            session.like_by_tags(['fashion'], amount=1, interact=False)
            logger.warning(time.time() - cur)
        except Exception as e:
            reporter.error(e)
Beispiel #9
0
def run( app=None, components=(), method=STANDALONE, name="retro",
root = ".", resetlog=False, address="", port=None, prefix='', asynchronous=False,
sessions=False, withReactor=None, processStack=lambda x:x, runCondition=lambda:True,
onError=None ):
	"""Runs this web application with the given method (easiest one is STANDALONE),
	with the given root (directory from where the web app-related resource
	will be resolved).

	This function is the 'main' for your web application, so this is basically
	the last call you should have in your web application main."""
	if app == None:
		app = Application(prefix=prefix,components=components)
	else:
		for _ in components: app.register(_)
	# We set up the configuration if necessary
	config = app.config()
	if not config: config = Configuration(CONFIG)
	# Adjusts the working directory to basepath
	root = os.path.abspath(root)
	if os.path.isfile(root): root = os.path.dirname(root)
	# We set the application root to the given root, and do a chdir
	os.chdir(root)
	config.setdefault("root",    root)
	config.setdefault("name",    name)
	config.setdefault("logfile", name + ".log")
	if resetlog: os.path.unlink(config.logfile())
	# We set the configuration
	app.config(config)
	# And start the application
	app.start()
	# NOTE: Maybe we should always print it
	#print app.config()
	# We start the WSGI stack
	stack = app._dispatcher
	stack = processStack(stack)
	# == FCGI (Flup-provided)
	#
	if method == FCGI:
		if not has(FLUP):
			raise ImportError("Flup is required to run FCGI")
		fcgi_address = address or config.get("address")
		fcgi_port    = port or config.get("port")
		if fcgi_port and fcgi_address:
			server = FLUP_FCGIServer(stack, bindAddress=(fcgi_address, fcgi_port))
		elif fcgi_address:
			server = FLUP_FCGIServer(stack, bindAddress=fcgi_address)
		else:
			server = FLUP_FCGIServer(stack)
		server.run()
	#
	# == SCGI (Flup-provided)
	#
	elif method == SCGI:
		if not has(FLUP):
			raise ImportError("Flup is required to run SCGI")
		fcgi_address = address or config.get("address")
		fcgi_port    = port or config.get("port")
		if fcgi_port and fcgi_address:
			server = FLUP_SCGIServer(stack, bindAddress=(fcgi_address, fcgi_port))
		elif fcgi_address:
			server = FLUP_SCGIServer(stack, bindAddress=fcgi_address)
		else:
			server = FLUP_SCGIServer(stack)
		server.run()
	#
	# == CGI
	#
	elif method == CGI:
		environ         = {} ; environ.update(os.environ)
		# From <http://www.python.org/dev/peps/pep-0333/#the-server-gateway-side>
		environ['wsgi.input']        = sys.stdin
		environ['wsgi.errors']       = sys.stderr
		environ['wsgi.version']      = (1,0)
		environ['wsgi.multithread']  = False
		environ['wsgi.multiprocess'] = True
		environ['wsgi.run_once']     = True
		if environ.get('HTTPS','off') in ('on','1'):
			environ['wsgi.url_scheme'] = 'https'
		else:
			environ['wsgi.url_scheme'] = 'http'
		# FIXME: Don't know if it's the proper solution
		req_uri = environ["REQUEST_URI"]
		script_name = environ["SCRIPT_NAME"]
		if req_uri.startswith(script_name):
			environ["PATH_INFO"]  = req_uri[len(script_name):]
		else:
			environ["PATH_INFO"]  = "/"
		if sessions:
			environ["com.saddi.service.session"] = sessions
		def start_response( status, headers, executionInfo=None ):
			for key, value in headers:
				print ("%s: %s" % (key, value))
			print ()
		# FIXME: This is broken
		res = "".join(tuple(self.dispatcher(environ, start_response)))
		print (res)
		if sessions:
			sessions.close()
	#
	# == GEVENT, BJOERN, ROCKET & WSGI
	#
	elif method in (GEVENT, BJOERN, ROCKET, WSGI):

		host   = config.get("host")
		port   = config.get("port")
		try:
			import reporter as logging
		except:
			import logging
		def application(environ, startResponse):
			# Gevent needs a wrapper
			if "retro.app" not in environ: environ["retro.app"] = stack.app()
			return environ["retro.app"](environ, startResponse)
		def logged_application(environ, startResponse):
			logging.info("{0} {1}".format(environ["REQUEST_METHOD"], environ["PATH_INFO"]))
			if "retro.app" not in environ: environ["retro.app"] = stack.app()
			return environ["retro.app"](environ, startResponse)
		if method == "GEVENT":
			try:
				from gevent import wsgi
			except ImportError:
				raise ImportError("gevent is required to run `gevent` method")
			# NOTE: This starts using gevent's WSGI server (faster!)
			wsgi.WSGIServer((host,port), application, spawn=None).serve_forever()
		elif method == BJOERN:
			try:
				import bjoern
			except ImportError:
				raise ImportError("bjoern is required to run `bjoern` method")
			bjoern.run(logged_application, host, port)
		elif method == ROCKET:
			try:
				import rocket
			except ImportError:
				raise ImportError("rocket is required to run `rocket` method")
			rocket.Rocket((host, int(port)), "wsgi", {"wsgi_app":application}).start()
		elif method == WSGI:
			# When using standalone WSGI, we make sure to wrap RendezVous objects
			# that might be returned by the handlers, and make sure we wait for
			# them -- we could use a callback version instead for specific web
			# servers.
			def retro_rendezvous_wrapper( environ, start_response, request=None):
				results = stack(environ, start_response, request)
				for result in results:
					if isinstance(result, RendezVous):
						result.wait()
						continue
					yield result
			retro_rendezvous_wrapper.stack = stack
			return retro_rendezvous_wrapper
	# == STANDALONE (WSGIREF)
	#
	# elif method == STANDALONE_WSGIREF:
	# 	server_address     = (
	# 		address or app.config("address") or DEFAULT_ADDRESS,
	# 		port or app.config("port") or DEFAULT_PORT
	# 	)
	# 	server = WSGIServer(server_address, WSGIRequestHandler)
	# 	server.set_app(stack)
	# 	socket = server.socket.getsockname()
	# 	print "WSGIREF server listening on %s:%s" % ( socket[0], socket[1])
	# 	try:
	# 		while runCondition: server.handle_request()
	# 	except KeyboardInterrupt:
	# 		print "done"
	#
	# == STANDALONE (Retro WSGI server)
	#
	elif method in (STANDALONE, AIO):
		try:
			import reporter as logging
		except:
			import logging
		server_address     = (
			address or app.config("address") or DEFAULT_ADDRESS,
			int(port or app.config("port") or DEFAULT_PORT)
		)
		stack.fromRetro = True
		stack.app       = lambda: app
		if method == STANDALONE and not asynchronous:
			import retro.wsgi
			try:
				server   = retro.wsgi.WSGIServer(server_address, stack)
				retro.wsgi.onError(onError)
				socket = server.socket.getsockname()
				print ("Retro embedded server listening on %s:%s" % ( socket[0], socket[1]))
			except Exception as e:
				logging.error("Retro: Cannot bind to {0}:{1}, error: {2}".format(server_address[0], server_address[1], e))
				return -1
			# TODO: Support runCondition
			try:
				while runCondition():
					server.handle_request()
			except KeyboardInterrupt:
				print ("done")
		else:
			import retro.aio
			import asyncio
			retro.aio.run(app, server_address[0], server_address[1])
			# TODO: Support runCondition
	else:
		raise Exception("Unknown retro setup method:" + method)
Beispiel #10
0
	def fromCatalogue( self, path, range=None, test=False, callback=None ):
		"""Reads the given catalogue and copies directories, symlinks and files
		listed in the catalogue. Note that this expects the catalogue to
		be in traversal order."""
		logging.info("Opening catalogue: {0}".format(path))
		# The base is the common prefix/ancestor of all the paths in the
		# catalogue. The root changes but will always start with the base.
		base      = None
		root      = None
		self.test = test
		# When no range is specified, we look for the index path
		# and load it.
		if range is None and os.path.exists(self._indexPath) and os.stat(path)[stat.ST_MTIME] <= os.stat(self._indexPath)[stat.ST_MTIME]:
			with open(self._indexPath, "r") as f:
				r = f.read()
			try:
				r = int(r)
				range = (r,-1)
			except ValueError as e:
				pass
		with open(path, "r") as f:
			for line in f:
				j_t_p     = line.split(Catalogue.FIELD_SEPARATOR, 2)
				if len(j_t_p) != 3:
					logging.error("Malformed line, expecting at least 3 colon-separated values: {0}".format(repr(line)))
					continue
				j, t, p   =  j_t_p
				p = p[:-1]
				i         = int(j) ; self.last = i
				if t == TYPE_BASE:
					# The first line of the catalogue is expected to be the base
					# it is also expected to be absolute.
					self.base = base = p
					assert os.path.exists(p), "Base directory does not exists: {0}".format(utf8(p))
					# Once we have the base, we can create rawcopy's DB files
					rd = os.path.join(self.output, "__rawcopy__")
					if not os.path.exists(rd):
						logging.info("Creating rawcopy database directory {0}".format(utf8(rd)))
						os.makedirs(rd)
					self._open(os.path.join(rd, "copy.db"))
				elif t == TYPE_ROOT:
					# If we found a root, we ensure that it is prefixed with the
					# base
					assert base, "Catalogue must have a base directory before having roots"
					assert os.path.normpath(p).startswith(os.path.normpath(base)), "Catalogue roots must be prefixed by the base, base={0}, root={1}".format(utf8(base), utf8(p))
					# Now we extract the suffix, which is the root minus the base
					# and no leading /
					self.root = root = p
					source    = p
					suffix    = p[len(self.base):]
					if suffix and suffix[0] == "/": suffix = suffix[1:]
					destination = os.path.join(os.path.join(self.output, suffix))
					if not (os.path.exists(destination) and not os.path.islink(destination)):
						pd = os.path.dirname(destination)
						logging.info("Creating root: {0}:{1}".format(i, utf8(p)))
						# We make sure the source exists
						if not os.path.exists(source) and not os.path.islink(source):
							logging.info("Root does not exists: {0}:{1}".format(i, utf8(p)))
						# TODO: How do we handle filters at this stage?
						# We make sure the parent destination exists (it should be the case)
						if not os.path.exists(pd):
							# We copy the original parent directory
							self.copydir(p, pd, suffix)
						if os.path.isdir(source):
							self.copydir(p, destination, suffix)
						elif os.path.islink(source):
							self.copylink(p, destination, suffix)
						elif os.path.isfile(source):
							self.copyfile(p, destination, suffix)
						else:
							logging.error("Unsupported root (not a dir/link/file): {0}:{1}".format(i, utf8(p)))
				else:
					# We skip the indexes that are not within the range, if given
					if range:
						if i < range[0]: continue
						if len(range) > 1 and range[1] >= 0 and i > range[1]:
							logging.info("Reached end of range {0} >= {1}".format(i, range[1]))
							break
					# We check if the filter matches
					if not self.match(p, t):
						continue
					assert root and self.output
					# We prepare the source, suffix and destination
					source = os.path.join(root, p)
					assert source.startswith(base), "os.path.join(root={0}, path={1}) expected to start with base={2}".format(repr(root), repr(p), repr(base))
					suffix = source[len(base):]
					if suffix[0] == "/": suffix = suffix[1:]
					destination = os.path.join(os.path.join(self.output, suffix))
					assert suffix, "Empty suffix: source={0}, path={1}, destination={2}".format(utf8(source), utf(p), utf8(destination))
					# We now proceed with the actual copy
					if not (os.path.exists(source) or os.path.islink(source)):
						logging.error("Source path not available: {0}:{1}".format(i,utf8(source)))
					elif not (os.path.exists(destination) or os.path.islink(destination)):
						logging.info("Copying path [{2}] {0}:{1}".format(i,utf8(p),t))
						if t == TYPE_DIR or os.path.isdir(source):
							if t != TYPE_DIR: logging.warn("Source detected as directory, but typed as {0} -- {1}:{2}".format(t, i, utf8(p)))
							self.copydir(source, destination, p)
						elif t == TYPE_SYMLINK:
							self.copylink(source, destination, p)
						elif t == TYPE_FILE:
							self.copyfile(source, destination, p)
						else:
							logging.error("Copy: line {0} unsupported type {1}".format(i, t, p))
					elif not self.test:
						# We only fo there if we're not in test mode
						if t == TYPE_DIR:
							logging.info("Skipping already copied directory: {0}:{1}".format(i, utf8(destination)))
						elif t == TYPE_SYMLINK:
							logging.info("Skipping already copied link: {0}:{1}".format(i, utf8(destination)))
						elif t == TYPE_FILE:
							logging.info("Skipping already copied file: {0}:{1}".format(i, utf8(destination)))
						# TODO: We should repair a damaged DB and make sure the inode is copied
						self.ensureInodePath(source, suffix)
					# We call the callback
					if callback:
						callback(i, t, p, source, destination)
				# We sync the database every 1000 item
				if j.endswith("000") and (not range or i>=range[0]):
					logging.info("{0} items processed, syncing db".format(i))
					self._sync(j)
		# We don't forget to close the DB
		self._close()