def run(self, app, args): if args.devcmd == 'fetch': self.run_fetch(app, args) elif args.devcmd == 'parse': self.run_parse(app, args) elif args.devcmd == 'scrape': self.run_scrape(app, args) elif args.devcmd == 'analyze': self.run_analyze(app, args) elif args.devcmd == 'query': self.run_query(app, args) elif args.devcmd == 'query2': self.run_query2(app, args) elif args.devcmd == 'download': self.run_download(app, args) elif not args.devcmd: raise extensions.CommandUsageError() else: raise NotImplementedError()
def run(self, app, args): if args.list: labels = ["id", "state", "name", "size", "progress"] columns = ["crc32", "state", "name", "size", "progress"] data = uilib.build_dataset( self.srvs.db, columns, [src for (src, state) in app.get_downloads()] ) uilib.display_data(data, labels=labels) elif args.cancel: data = uilib.build_dataset( self.srvs.db, ["crc32", "raw_source"], [src for (src, state) in app.get_downloads()], ) data = {x[0]: x[1] for x in data} if args.cancel not in data: print("Error: Invalid ID") return app.cancel(data[args.cancel]) else: raise extensions.CommandUsageError()
def do_query2(self, app, args): def _parse_queryparams(pairs): for pair in pairs: key, value = pair.split('=', 1) if not key or not value: raise ValueError(pair) yield (key, value) if not args.queryparams and not args.querystring: errmsg = "filter or querystring are requierd" print(errmsg, file=sys.stderr) raise extensions.CommandUsageError() q = {} if args.querystring: q = query.Query.fromstring(args.querystring) if args.queryparams: params = dict(_parse_queryparams(args.queryparams)) q = query.Query(**params) # Setup filters before scrape anything query_engine = query.Engine() try: filters = query_engine.build_filter(q) except query.MissingFiltersError as e: errmsg = "Unknow filters: %s" errmsg = errmsg % ', '.join(e.args[0]) print(errmsg, file=sys.stderr) raise extensions.CommandUsageError() # Build scrape ctxs and process them scrape_engine = scraper.Engine() ctxs = scrape_engine.build_contexts_for_query(q) sources = scrape_engine.process(*ctxs) sources = analyze.analyze(*sources) # Pass sources thru filters results = query_engine.apply(filters, sources) results = query_engine.sort(results) # Output results = [[entity.dict(), [src.dict() for src in sources]] for (entity, sources) in results] output = json.dumps(results, indent=2, default=_json_encode_hook) args.output.write(output)
def run_fetch(self, app, args): if not args.provider and not args.uri: raise extensions.CommandUsageError() engine = scraper.Engine(app.srvs) ctx = engine.build_context(args.provider, args.uri) result = engine.fetch_one(ctx) args.output.write(result)
def do_query(self, app, args): def _parse_queryparams(pairs): for pair in pairs: key, value = pair.split('=', 1) if not key or not value: raise ValueError(pair) yield (key, value) if not args.queryparams and not args.querystring: errmsg = "filter or querystring are requierd" print(errmsg, file=sys.stderr) raise extensions.CommandUsageError() q = {} if args.querystring: q.update(query.Query.fromstring(args.querystring)) if args.queryparams: params = dict(_parse_queryparams(args.queryparams)) q = query.Query(**params) engine = query.Engine() try: ctx = engine.build_filter(q) except query.MissingFiltersError as e: errmsg = "Unknow filters: %s" errmsg = errmsg % ', '.join(e.args[0]) print(errmsg, file=sys.stderr) raise extensions.CommandUsageError() data = json.loads(args.input.read()) data = [schema.Source(**x) for x in data] results = engine.apply(ctx, data) results = engine.sort(results) results = [[entity.dict(), [src.dict() for src in sources]] for (entity, sources) in results] output = json.dumps(results, indent=2, default=_json_encode_hook) args.output.write(output)
def run_scrape(self, app, args): if not args.provider and not args.uri: raise extensions.CommandUsageError() engine = scraper.Engine(app.srvs) ctxs = engine.build_n_contexts(args.iterations, args.provider, args.uri, type=args.type, language=args.language) results = engine.process(*ctxs) output = json.dumps([x.dict() for x in results], indent=2) args.output.write(output)
def run_download(self, app, args): dls = downloads.Downloads() if args.list: print(repr(dls.get_active())) elif args.add: data = json.loads(args.input.read()) data = [(schema.Entity(**key), [schema.Source(**src) for src in collection]) for (key, collection) in data] for (key, collection) in data: try: dls.add(collection[0]) except extensions.ExtensionError as e: print("Add '%s' failed. Extension error: %r" % (collection[0], e)) else: raise extensions.CommandUsageError()