Ejemplo n.º 1
0
    def run(self, app, args):
        if args.devcmd == 'fetch':
            self.run_fetch(app, args)

        elif args.devcmd == 'parse':
            self.run_parse(app, args)

        elif args.devcmd == 'scrape':
            self.run_scrape(app, args)

        elif args.devcmd == 'analyze':
            self.run_analyze(app, args)

        elif args.devcmd == 'query':
            self.run_query(app, args)

        elif args.devcmd == 'query2':
            self.run_query2(app, args)

        elif args.devcmd == 'download':
            self.run_download(app, args)

        elif not args.devcmd:
            raise extensions.CommandUsageError()

        else:
            raise NotImplementedError()
Ejemplo n.º 2
0
    def run(self, app, args):
        if args.list:
            labels = ["id", "state", "name", "size", "progress"]
            columns = ["crc32", "state", "name", "size", "progress"]

            data = uilib.build_dataset(
                self.srvs.db, columns, [src for (src, state) in app.get_downloads()]
            )

            uilib.display_data(data, labels=labels)

        elif args.cancel:
            data = uilib.build_dataset(
                self.srvs.db,
                ["crc32", "raw_source"],
                [src for (src, state) in app.get_downloads()],
            )
            data = {x[0]: x[1] for x in data}

            if args.cancel not in data:
                print("Error: Invalid ID")
                return

            app.cancel(data[args.cancel])

        else:
            raise extensions.CommandUsageError()
Ejemplo n.º 3
0
    def do_query2(self, app, args):
        def _parse_queryparams(pairs):
            for pair in pairs:
                key, value = pair.split('=', 1)
                if not key or not value:
                    raise ValueError(pair)

                yield (key, value)

        if not args.queryparams and not args.querystring:
            errmsg = "filter or querystring are requierd"
            print(errmsg, file=sys.stderr)
            raise extensions.CommandUsageError()

        q = {}
        if args.querystring:
            q = query.Query.fromstring(args.querystring)

        if args.queryparams:
            params = dict(_parse_queryparams(args.queryparams))
            q = query.Query(**params)

        # Setup filters before scrape anything
        query_engine = query.Engine()
        try:
            filters = query_engine.build_filter(q)
        except query.MissingFiltersError as e:
            errmsg = "Unknow filters: %s"
            errmsg = errmsg % ', '.join(e.args[0])
            print(errmsg, file=sys.stderr)
            raise extensions.CommandUsageError()

        # Build scrape ctxs and process them
        scrape_engine = scraper.Engine()
        ctxs = scrape_engine.build_contexts_for_query(q)
        sources = scrape_engine.process(*ctxs)
        sources = analyze.analyze(*sources)

        # Pass sources thru filters
        results = query_engine.apply(filters, sources)
        results = query_engine.sort(results)

        # Output
        results = [[entity.dict(), [src.dict() for src in sources]]
                   for (entity, sources) in results]
        output = json.dumps(results, indent=2, default=_json_encode_hook)
        args.output.write(output)
Ejemplo n.º 4
0
    def run_fetch(self, app, args):
        if not args.provider and not args.uri:
            raise extensions.CommandUsageError()

        engine = scraper.Engine(app.srvs)
        ctx = engine.build_context(args.provider, args.uri)
        result = engine.fetch_one(ctx)
        args.output.write(result)
Ejemplo n.º 5
0
    def do_query(self, app, args):
        def _parse_queryparams(pairs):
            for pair in pairs:
                key, value = pair.split('=', 1)
                if not key or not value:
                    raise ValueError(pair)

                yield (key, value)

        if not args.queryparams and not args.querystring:
            errmsg = "filter or querystring are requierd"
            print(errmsg, file=sys.stderr)
            raise extensions.CommandUsageError()

        q = {}
        if args.querystring:
            q.update(query.Query.fromstring(args.querystring))

        if args.queryparams:
            params = dict(_parse_queryparams(args.queryparams))
            q = query.Query(**params)

        engine = query.Engine()
        try:
            ctx = engine.build_filter(q)
        except query.MissingFiltersError as e:
            errmsg = "Unknow filters: %s"
            errmsg = errmsg % ', '.join(e.args[0])
            print(errmsg, file=sys.stderr)
            raise extensions.CommandUsageError()

        data = json.loads(args.input.read())
        data = [schema.Source(**x) for x in data]
        results = engine.apply(ctx, data)
        results = engine.sort(results)

        results = [[entity.dict(), [src.dict() for src in sources]]
                   for (entity, sources) in results]
        output = json.dumps(results, indent=2, default=_json_encode_hook)
        args.output.write(output)
Ejemplo n.º 6
0
    def run_scrape(self, app, args):
        if not args.provider and not args.uri:
            raise extensions.CommandUsageError()

        engine = scraper.Engine(app.srvs)
        ctxs = engine.build_n_contexts(args.iterations,
                                       args.provider,
                                       args.uri,
                                       type=args.type,
                                       language=args.language)
        results = engine.process(*ctxs)

        output = json.dumps([x.dict() for x in results], indent=2)
        args.output.write(output)
Ejemplo n.º 7
0
    def run_download(self, app, args):
        dls = downloads.Downloads()
        if args.list:
            print(repr(dls.get_active()))

        elif args.add:
            data = json.loads(args.input.read())
            data = [(schema.Entity(**key),
                     [schema.Source(**src) for src in collection])
                    for (key, collection) in data]

            for (key, collection) in data:
                try:
                    dls.add(collection[0])
                except extensions.ExtensionError as e:
                    print("Add '%s' failed. Extension error: %r" %
                          (collection[0], e))

        else:
            raise extensions.CommandUsageError()