def write( self, output ): """Writes the catalogue to the given output, this triggers a walk of the catalogue.""" for i, t, p in self.walk(): assert t in TYPES try: line = bytes("{0}{3}{1}{3}{2}{4}".format(i,t,p, self.FIELD_SEPARATOR, self.LINE_SEPARATOR), "utf8") output.write(line) except UnicodeEncodeError as e: logging.error("Catalogue: exception occured {0}".format(e))
def write(self, output): """Writes the catalogue to the given output, this triggers a walk of the catalogue.""" for i, t, p in self.walk(): assert t in TYPES try: line = bytes( "{0}{3}{1}{3}{2}{4}".format(i, t, p, self.FIELD_SEPARATOR, self.LINE_SEPARATOR), "utf8") output.write(line) except UnicodeEncodeError as e: logging.error("Catalogue: exception occured {0}".format(e))
def Groups(cls, lines): """Creates groups out of the lines generated by `ParseLines`""" mode = None current = root = Group(type=TYPE_FILE) result = [current] lines = list(lines) for i, t, l in lines: if t == TYPE_SYMBOL: current = Group( type=TYPE_SYMBOL, classifier=l.strip(), start=i, ) result.append(current) elif t == TYPE_DOC: if not current.code: current.doc.append(l) else: current = Group( type=TYPE_FILE, doc=[l], start=i, ) result.append(current) elif t == TYPE_CODE: current.code.append(l) else: assert None # Now we post_process groups r = [] for i, group in enumerate(result): if group.type == TYPE_SYMBOL: first_line = None try: first_line = (_ for _ in group.code if _).next() except StopIteration: reporter.error("Group has no code: {0}".format(group)) if first_line: match = SYMBOL_EXTRACTORS[group.classifier].match( first_line) assert match, "Symbol extractor {0} cannot match {1}".format( group.classifier, first_line) group.name = match.groups()[-1] root.symbols[group.name] = group r.append(group) else: reporter.warn( "Filtered out empty group: {0} at {1}".format( group, lines[group.start])) return r
def Groups( cls, lines ): """Creates groups out of the lines generated by `ParseLines`""" mode = None current = root = Group(type=TYPE_FILE) result = [current] lines = list(lines) for i,t,l in lines: if t == TYPE_SYMBOL: current = Group( type = TYPE_SYMBOL, classifier = l.strip(), start = i, ) result.append(current) elif t == TYPE_DOC: if not current.code: current.doc.append(l) else: current = Group( type = TYPE_FILE, doc = [l], start = i, ) result.append(current) elif t == TYPE_CODE: current.code.append(l) else: assert None # Now we post_process groups r = [] for i,group in enumerate(result): if group.type == TYPE_SYMBOL: first_line = None try: first_line = (_ for _ in group.code if _).next() except StopIteration: reporter.error("Group has no code: {0}".format(group)) if first_line: match = SYMBOL_EXTRACTORS[group.classifier].match(first_line) assert match, "Symbol extractor {0} cannot match {1}".format(group.classifier, first_line) group.name = match.groups()[-1] root.symbols[group.name] = group r.append(group) else: reporter.warn("Filtered out empty group: {0} at {1}".format(group, lines[group.start])) return r
def run(args): sources = [os.path.abspath(_) for _ in args.source] base = os.path.commonprefix(sources) if not os.path.exists(base) or not os.path.isdir(base): base = os.path.dirname(base) for s in sources: if not os.path.exists(s): logging.error("Source path does not exists: {0}".format(s)) return None # We setup the filter node_filter = Filter(types=args.type, names=args.name) # We log the information about the sources logging.info("Using base: {0}".format(base)) for _ in sources: logging.info("Using source: {0}".format(_)) # Sometimes the sources have a common filename prefix, so make sure it is # a directory or we get its dirname # Now we create the catalogue if not (args.catalogue or args.output): logging.error("Either catalogue or output directory are required") return -1 # Now we retrieve/create the catalogue cat_path = args.catalogue or os.path.join(args.output, "__rawcopy__", "catalogue.lst") if not os.path.exists(cat_path): logging.info("Creating source catalogue at {0}".format(cat_path)) c = Catalogue(sources, base, node_filter) c.save(cat_path) elif args.catalogue_only: logging.info("Catalogue-only mode, regenerating the catalogue") c = Catalogue(sources, base, node_filter) c.save(cat_path) # Now we iterate over the catalogue if args.catalogue_only: logging.info( "Catalogue-only mode, skipping copy. Remove -C option to do the actual copy" ) elif args.list: # FIXME: Use a copy with no action c = Copy(args.output, node_filter) r = args.range c.fromCatalogue(cat_path, range=r, test=True, callback=lambda i, t, p, s, d: sys.stdout.write( "{0}\t{1}\t{2}\t{3}\t{4}\n".format(i, t, p, s, d))) elif args.output: logging.info("Copy catalogue's contents to {0}".format(args.output)) c = Copy(args.output, node_filter) r = args.range if r: try: r = [int(_ or -1) for _ in r.split("-")] except ValueError as e: logging.error("Unsupported range format. Expects `start-end`") return -1 logging.info("Using catalogue item range: {0}".format(r)) if args.test: logging.info("Test mode enabled (not actual file copy)".format(r)) c.fromCatalogue(cat_path, range=r, test=args.test)
def run( args ): sources = [os.path.abspath(_) for _ in args.source] base = os.path.commonprefix(sources) if not os.path.exists(base) or not os.path.isdir(base): base = os.path.dirname(base) for s in sources: if not os.path.exists(s): logging.error("Source path does not exists: {0}".format(s)) return None # We setup the filter node_filter = Filter(types=args.type, names=args.name) # We log the information about the sources logging.info("Using base: {0}".format(base)) for _ in sources: logging.info("Using source: {0}".format(_)) # Sometimes the sources have a common filename prefix, so make sure it is # a directory or we get its dirname # Now we create the catalogue if not (args.catalogue or args.output): logging.error("Either catalogue or output directory are required") return -1 # Now we retrieve/create the catalogue cat_path = args.catalogue or os.path.join(args.output, "__rawcopy__", "catalogue.lst") if not os.path.exists(cat_path): logging.info("Creating source catalogue at {0}".format(cat_path)) c = Catalogue(sources, base, node_filter) c.save(cat_path) elif args.catalogue_only: logging.info("Catalogue-only mode, regenerating the catalogue") c = Catalogue(sources, base, node_filter) c.save(cat_path) # Now we iterate over the catalogue if args.catalogue_only: logging.info("Catalogue-only mode, skipping copy. Remove -C option to do the actual copy") elif args.list: # FIXME: Use a copy with no action c = Copy(args.output, node_filter) r = args.range c.fromCatalogue(cat_path, range=r, test=True, callback=lambda i,t,p,s,d:sys.stdout.write("{0}\t{1}\t{2}\t{3}\t{4}\n".format(i,t,p,s,d))) elif args.output: logging.info("Copy catalogue's contents to {0}".format(args.output)) c = Copy(args.output, node_filter) r = args.range if r: try: r = [int(_ or -1) for _ in r.split("-")] except ValueError as e: logging.error("Unsupported range format. Expects `start-end`") return -1 logging.info("Using catalogue item range: {0}".format(r)) if args.test: logging.info("Test mode enabled (not actual file copy)".format(r)) c.fromCatalogue(cat_path, range=r, test=args.test)
def fromCatalogue(self, path, range=None, test=False, callback=None): """Reads the given catalogue and copies directories, symlinks and files listed in the catalogue. Note that this expects the catalogue to be in traversal order.""" logging.info("Opening catalogue: {0}".format(path)) # The base is the common prefix/ancestor of all the paths in the # catalogue. The root changes but will always start with the base. base = None root = None self.test = test # When no range is specified, we look for the index path # and load it. if range is None and os.path.exists( self._indexPath) and os.stat(path)[stat.ST_MTIME] <= os.stat( self._indexPath)[stat.ST_MTIME]: with open(self._indexPath, "r") as f: r = f.read() try: r = int(r) range = (r, -1) except ValueError as e: pass with open(path, "r") as f: for line in f: j_t_p = line.split(Catalogue.FIELD_SEPARATOR, 2) if len(j_t_p) != 3: logging.error( "Malformed line, expecting at least 3 colon-separated values: {0}" .format(repr(line))) continue j, t, p = j_t_p p = p[:-1] i = int(j) self.last = i if t == TYPE_BASE: # The first line of the catalogue is expected to be the base # it is also expected to be absolute. self.base = base = p assert os.path.exists( p), "Base directory does not exists: {0}".format( utf8(p)) # Once we have the base, we can create rawcopy's DB files rd = os.path.join(self.output, "__rawcopy__") if not os.path.exists(rd): logging.info( "Creating rawcopy database directory {0}".format( utf8(rd))) os.makedirs(rd) self._open(os.path.join(rd, "copy.db")) elif t == TYPE_ROOT: # If we found a root, we ensure that it is prefixed with the # base assert base, "Catalogue must have a base directory before having roots" assert os.path.normpath(p).startswith( os.path.normpath(base) ), "Catalogue roots must be prefixed by the base, base={0}, root={1}".format( utf8(base), utf8(p)) # Now we extract the suffix, which is the root minus the base # and no leading / self.root = root = p source = p suffix = p[len(self.base):] if suffix and suffix[0] == "/": suffix = suffix[1:] destination = os.path.join( os.path.join(self.output, suffix)) if not (os.path.exists(destination) and not os.path.islink(destination)): pd = os.path.dirname(destination) logging.info("Creating root: {0}:{1}".format( i, utf8(p))) # We make sure the source exists if not os.path.exists(source) and not os.path.islink( source): logging.info( "Root does not exists: {0}:{1}".format( i, utf8(p))) # TODO: How do we handle filters at this stage? # We make sure the parent destination exists (it should be the case) if not os.path.exists(pd): # We copy the original parent directory self.copydir(p, pd, suffix) if os.path.isdir(source): self.copydir(p, destination, suffix) elif os.path.islink(source): self.copylink(p, destination, suffix) elif os.path.isfile(source): self.copyfile(p, destination, suffix) else: logging.error( "Unsupported root (not a dir/link/file): {0}:{1}" .format(i, utf8(p))) else: # We skip the indexes that are not within the range, if given if range: if i < range[0]: continue if len(range) > 1 and range[1] >= 0 and i > range[1]: logging.info( "Reached end of range {0} >= {1}".format( i, range[1])) break # We check if the filter matches if not self.match(p, t): continue assert root and self.output # We prepare the source, suffix and destination source = os.path.join(root, p) assert source.startswith( base ), "os.path.join(root={0}, path={1}) expected to start with base={2}".format( repr(root), repr(p), repr(base)) suffix = source[len(base):] if suffix[0] == "/": suffix = suffix[1:] destination = os.path.join( os.path.join(self.output, suffix)) assert suffix, "Empty suffix: source={0}, path={1}, destination={2}".format( utf8(source), utf(p), utf8(destination)) # We now proceed with the actual copy if not (os.path.exists(source) or os.path.islink(source)): logging.error( "Source path not available: {0}:{1}".format( i, utf8(source))) elif not (os.path.exists(destination) or os.path.islink(destination)): logging.info("Copying path [{2}] {0}:{1}".format( i, utf8(p), t)) if t == TYPE_DIR or os.path.isdir(source): if t != TYPE_DIR: logging.warn( "Source detected as directory, but typed as {0} -- {1}:{2}" .format(t, i, utf8(p))) self.copydir(source, destination, p) elif t == TYPE_SYMLINK: self.copylink(source, destination, p) elif t == TYPE_FILE: self.copyfile(source, destination, p) else: logging.error( "Copy: line {0} unsupported type {1}".format( i, t, p)) elif not self.test: # We only fo there if we're not in test mode if t == TYPE_DIR: logging.info( "Skipping already copied directory: {0}:{1}". format(i, utf8(destination))) elif t == TYPE_SYMLINK: logging.info( "Skipping already copied link: {0}:{1}".format( i, utf8(destination))) elif t == TYPE_FILE: logging.info( "Skipping already copied file: {0}:{1}".format( i, utf8(destination))) # TODO: We should repair a damaged DB and make sure the inode is copied self.ensureInodePath(source, suffix) # We call the callback if callback: callback(i, t, p, source, destination) # We sync the database every 1000 item if j.endswith("000") and (not range or i >= range[0]): logging.info("{0} items processed, syncing db".format(i)) self._sync(j) # We don't forget to close the DB self._close()
import reporter patch.apply() reporter.set_version("like-ff-2.1-try") # set a version tag SLEEP_BETWEEN_EACH_LIKE = 20 session = InstaPy(bypass_suspicious_attempt=True, headless_browser=True, use_firefox=True, **reporter.Arguments().all()) with smart_run(session): while True: cur = time.time() try: time.sleep(SLEEP_BETWEEN_EACH_LIKE) session.like_by_tags(['love'], amount=1, interact=False) time.sleep(SLEEP_BETWEEN_EACH_LIKE) session.like_by_tags(['instagood'], amount=1, interact=False) time.sleep(SLEEP_BETWEEN_EACH_LIKE) session.like_by_tags(['photooftheday'], amount=1, interact=False) time.sleep(SLEEP_BETWEEN_EACH_LIKE) session.like_by_tags(['fashion'], amount=1, interact=False) logger.warning(time.time() - cur) except Exception as e: reporter.error(e)
def run( app=None, components=(), method=STANDALONE, name="retro", root = ".", resetlog=False, address="", port=None, prefix='', asynchronous=False, sessions=False, withReactor=None, processStack=lambda x:x, runCondition=lambda:True, onError=None ): """Runs this web application with the given method (easiest one is STANDALONE), with the given root (directory from where the web app-related resource will be resolved). This function is the 'main' for your web application, so this is basically the last call you should have in your web application main.""" if app == None: app = Application(prefix=prefix,components=components) else: for _ in components: app.register(_) # We set up the configuration if necessary config = app.config() if not config: config = Configuration(CONFIG) # Adjusts the working directory to basepath root = os.path.abspath(root) if os.path.isfile(root): root = os.path.dirname(root) # We set the application root to the given root, and do a chdir os.chdir(root) config.setdefault("root", root) config.setdefault("name", name) config.setdefault("logfile", name + ".log") if resetlog: os.path.unlink(config.logfile()) # We set the configuration app.config(config) # And start the application app.start() # NOTE: Maybe we should always print it #print app.config() # We start the WSGI stack stack = app._dispatcher stack = processStack(stack) # == FCGI (Flup-provided) # if method == FCGI: if not has(FLUP): raise ImportError("Flup is required to run FCGI") fcgi_address = address or config.get("address") fcgi_port = port or config.get("port") if fcgi_port and fcgi_address: server = FLUP_FCGIServer(stack, bindAddress=(fcgi_address, fcgi_port)) elif fcgi_address: server = FLUP_FCGIServer(stack, bindAddress=fcgi_address) else: server = FLUP_FCGIServer(stack) server.run() # # == SCGI (Flup-provided) # elif method == SCGI: if not has(FLUP): raise ImportError("Flup is required to run SCGI") fcgi_address = address or config.get("address") fcgi_port = port or config.get("port") if fcgi_port and fcgi_address: server = FLUP_SCGIServer(stack, bindAddress=(fcgi_address, fcgi_port)) elif fcgi_address: server = FLUP_SCGIServer(stack, bindAddress=fcgi_address) else: server = FLUP_SCGIServer(stack) server.run() # # == CGI # elif method == CGI: environ = {} ; environ.update(os.environ) # From <http://www.python.org/dev/peps/pep-0333/#the-server-gateway-side> environ['wsgi.input'] = sys.stdin environ['wsgi.errors'] = sys.stderr environ['wsgi.version'] = (1,0) environ['wsgi.multithread'] = False environ['wsgi.multiprocess'] = True environ['wsgi.run_once'] = True if environ.get('HTTPS','off') in ('on','1'): environ['wsgi.url_scheme'] = 'https' else: environ['wsgi.url_scheme'] = 'http' # FIXME: Don't know if it's the proper solution req_uri = environ["REQUEST_URI"] script_name = environ["SCRIPT_NAME"] if req_uri.startswith(script_name): environ["PATH_INFO"] = req_uri[len(script_name):] else: environ["PATH_INFO"] = "/" if sessions: environ["com.saddi.service.session"] = sessions def start_response( status, headers, executionInfo=None ): for key, value in headers: print ("%s: %s" % (key, value)) print () # FIXME: This is broken res = "".join(tuple(self.dispatcher(environ, start_response))) print (res) if sessions: sessions.close() # # == GEVENT, BJOERN, ROCKET & WSGI # elif method in (GEVENT, BJOERN, ROCKET, WSGI): host = config.get("host") port = config.get("port") try: import reporter as logging except: import logging def application(environ, startResponse): # Gevent needs a wrapper if "retro.app" not in environ: environ["retro.app"] = stack.app() return environ["retro.app"](environ, startResponse) def logged_application(environ, startResponse): logging.info("{0} {1}".format(environ["REQUEST_METHOD"], environ["PATH_INFO"])) if "retro.app" not in environ: environ["retro.app"] = stack.app() return environ["retro.app"](environ, startResponse) if method == "GEVENT": try: from gevent import wsgi except ImportError: raise ImportError("gevent is required to run `gevent` method") # NOTE: This starts using gevent's WSGI server (faster!) wsgi.WSGIServer((host,port), application, spawn=None).serve_forever() elif method == BJOERN: try: import bjoern except ImportError: raise ImportError("bjoern is required to run `bjoern` method") bjoern.run(logged_application, host, port) elif method == ROCKET: try: import rocket except ImportError: raise ImportError("rocket is required to run `rocket` method") rocket.Rocket((host, int(port)), "wsgi", {"wsgi_app":application}).start() elif method == WSGI: # When using standalone WSGI, we make sure to wrap RendezVous objects # that might be returned by the handlers, and make sure we wait for # them -- we could use a callback version instead for specific web # servers. def retro_rendezvous_wrapper( environ, start_response, request=None): results = stack(environ, start_response, request) for result in results: if isinstance(result, RendezVous): result.wait() continue yield result retro_rendezvous_wrapper.stack = stack return retro_rendezvous_wrapper # == STANDALONE (WSGIREF) # # elif method == STANDALONE_WSGIREF: # server_address = ( # address or app.config("address") or DEFAULT_ADDRESS, # port or app.config("port") or DEFAULT_PORT # ) # server = WSGIServer(server_address, WSGIRequestHandler) # server.set_app(stack) # socket = server.socket.getsockname() # print "WSGIREF server listening on %s:%s" % ( socket[0], socket[1]) # try: # while runCondition: server.handle_request() # except KeyboardInterrupt: # print "done" # # == STANDALONE (Retro WSGI server) # elif method in (STANDALONE, AIO): try: import reporter as logging except: import logging server_address = ( address or app.config("address") or DEFAULT_ADDRESS, int(port or app.config("port") or DEFAULT_PORT) ) stack.fromRetro = True stack.app = lambda: app if method == STANDALONE and not asynchronous: import retro.wsgi try: server = retro.wsgi.WSGIServer(server_address, stack) retro.wsgi.onError(onError) socket = server.socket.getsockname() print ("Retro embedded server listening on %s:%s" % ( socket[0], socket[1])) except Exception as e: logging.error("Retro: Cannot bind to {0}:{1}, error: {2}".format(server_address[0], server_address[1], e)) return -1 # TODO: Support runCondition try: while runCondition(): server.handle_request() except KeyboardInterrupt: print ("done") else: import retro.aio import asyncio retro.aio.run(app, server_address[0], server_address[1]) # TODO: Support runCondition else: raise Exception("Unknown retro setup method:" + method)
def fromCatalogue( self, path, range=None, test=False, callback=None ): """Reads the given catalogue and copies directories, symlinks and files listed in the catalogue. Note that this expects the catalogue to be in traversal order.""" logging.info("Opening catalogue: {0}".format(path)) # The base is the common prefix/ancestor of all the paths in the # catalogue. The root changes but will always start with the base. base = None root = None self.test = test # When no range is specified, we look for the index path # and load it. if range is None and os.path.exists(self._indexPath) and os.stat(path)[stat.ST_MTIME] <= os.stat(self._indexPath)[stat.ST_MTIME]: with open(self._indexPath, "r") as f: r = f.read() try: r = int(r) range = (r,-1) except ValueError as e: pass with open(path, "r") as f: for line in f: j_t_p = line.split(Catalogue.FIELD_SEPARATOR, 2) if len(j_t_p) != 3: logging.error("Malformed line, expecting at least 3 colon-separated values: {0}".format(repr(line))) continue j, t, p = j_t_p p = p[:-1] i = int(j) ; self.last = i if t == TYPE_BASE: # The first line of the catalogue is expected to be the base # it is also expected to be absolute. self.base = base = p assert os.path.exists(p), "Base directory does not exists: {0}".format(utf8(p)) # Once we have the base, we can create rawcopy's DB files rd = os.path.join(self.output, "__rawcopy__") if not os.path.exists(rd): logging.info("Creating rawcopy database directory {0}".format(utf8(rd))) os.makedirs(rd) self._open(os.path.join(rd, "copy.db")) elif t == TYPE_ROOT: # If we found a root, we ensure that it is prefixed with the # base assert base, "Catalogue must have a base directory before having roots" assert os.path.normpath(p).startswith(os.path.normpath(base)), "Catalogue roots must be prefixed by the base, base={0}, root={1}".format(utf8(base), utf8(p)) # Now we extract the suffix, which is the root minus the base # and no leading / self.root = root = p source = p suffix = p[len(self.base):] if suffix and suffix[0] == "/": suffix = suffix[1:] destination = os.path.join(os.path.join(self.output, suffix)) if not (os.path.exists(destination) and not os.path.islink(destination)): pd = os.path.dirname(destination) logging.info("Creating root: {0}:{1}".format(i, utf8(p))) # We make sure the source exists if not os.path.exists(source) and not os.path.islink(source): logging.info("Root does not exists: {0}:{1}".format(i, utf8(p))) # TODO: How do we handle filters at this stage? # We make sure the parent destination exists (it should be the case) if not os.path.exists(pd): # We copy the original parent directory self.copydir(p, pd, suffix) if os.path.isdir(source): self.copydir(p, destination, suffix) elif os.path.islink(source): self.copylink(p, destination, suffix) elif os.path.isfile(source): self.copyfile(p, destination, suffix) else: logging.error("Unsupported root (not a dir/link/file): {0}:{1}".format(i, utf8(p))) else: # We skip the indexes that are not within the range, if given if range: if i < range[0]: continue if len(range) > 1 and range[1] >= 0 and i > range[1]: logging.info("Reached end of range {0} >= {1}".format(i, range[1])) break # We check if the filter matches if not self.match(p, t): continue assert root and self.output # We prepare the source, suffix and destination source = os.path.join(root, p) assert source.startswith(base), "os.path.join(root={0}, path={1}) expected to start with base={2}".format(repr(root), repr(p), repr(base)) suffix = source[len(base):] if suffix[0] == "/": suffix = suffix[1:] destination = os.path.join(os.path.join(self.output, suffix)) assert suffix, "Empty suffix: source={0}, path={1}, destination={2}".format(utf8(source), utf(p), utf8(destination)) # We now proceed with the actual copy if not (os.path.exists(source) or os.path.islink(source)): logging.error("Source path not available: {0}:{1}".format(i,utf8(source))) elif not (os.path.exists(destination) or os.path.islink(destination)): logging.info("Copying path [{2}] {0}:{1}".format(i,utf8(p),t)) if t == TYPE_DIR or os.path.isdir(source): if t != TYPE_DIR: logging.warn("Source detected as directory, but typed as {0} -- {1}:{2}".format(t, i, utf8(p))) self.copydir(source, destination, p) elif t == TYPE_SYMLINK: self.copylink(source, destination, p) elif t == TYPE_FILE: self.copyfile(source, destination, p) else: logging.error("Copy: line {0} unsupported type {1}".format(i, t, p)) elif not self.test: # We only fo there if we're not in test mode if t == TYPE_DIR: logging.info("Skipping already copied directory: {0}:{1}".format(i, utf8(destination))) elif t == TYPE_SYMLINK: logging.info("Skipping already copied link: {0}:{1}".format(i, utf8(destination))) elif t == TYPE_FILE: logging.info("Skipping already copied file: {0}:{1}".format(i, utf8(destination))) # TODO: We should repair a damaged DB and make sure the inode is copied self.ensureInodePath(source, suffix) # We call the callback if callback: callback(i, t, p, source, destination) # We sync the database every 1000 item if j.endswith("000") and (not range or i>=range[0]): logging.info("{0} items processed, syncing db".format(i)) self._sync(j) # We don't forget to close the DB self._close()