Exemple #1
0
def run(input_files: KGTKFiles,
        **options):
    """Run Kypher query according to the provided command-line arguments.
    """
    try:
        import_modules()
        debug = options.get('_debug', False)
        expert = options.get('_expert', False)
        loglevel = debug and 1 or 0
        
        if debug and expert:
            loglevel = 2
            print('OPTIONS:', options)

        # normalize path objects to strings:
        inputs = [str(f) for f in KGTKArgumentParser.get_input_file_list(input_files)]
        if len(inputs) == 0:
            raise KGTKException('At least one input needs to be supplied')

        output = options.get('output')
        if output == '-':
            output = sys.stdout
        if isinstance(output, str):
            output = sqlstore.open_to_write(output, mode='wt')

        parameters = parse_query_parameters(regular=options.get('regular_paras') or [],
                                            string=options.get('string_paras') or [],
                                            lqstring=options.get('lqstring_paras') or [])

        try:
            graph_cache = options.get('graph_cache_file')
            store = sqlstore.SqliteStore(graph_cache, create=not os.path.exists(graph_cache), loglevel=loglevel)
        
            query = kyquery.KgtkQuery(inputs, store, loglevel=loglevel,
                                      options=options.get('input_file_options'),
                                      query=options.get('query'),
                                      match=options.get('match'),
                                      where=options.get('where'),
                                      ret=options.get('return_'),
                                      order=options.get('order'),
                                      skip=options.get('skip'),
                                      limit=options.get('limit'),
                                      parameters=parameters,
                                      index=options.get('index'))
            
            explain = options.get('explain')
            if explain is not None:
                result = query.explain(explain)
                output.write(result)
            else:
                result = query.execute()
                # we are forcing \n line endings here instead of \r\n, since those
                # can be re/imported efficiently with the new SQLite import command;
                # we force `escapechar' back to None to avoid generation of double
                # backslashes as in 'Buffalo \'66', which in turn will now raise errors
                # if separators in fields are encountered (which seems what we want):
                csvwriter = csv.writer(output, dialect=None, delimiter='\t',
                                       quoting=csv.QUOTE_NONE, quotechar=None,
                                       lineterminator='\n',
                                       escapechar=None)
                if not options.get('no_header'):
                    csvwriter.writerow(query.result_header)
                csvwriter.writerows(result)
                
            output.flush()
        finally:
            store.close()
            if output is not None and output is not sys.stdout:
                output.close()
        
    except sh.SignalException_SIGPIPE:
        # hack to work around Python3 issue when stdout is gone when we try to report an exception;
        # without this we get an ugly 'Exception ignored...' msg when we quit with head or a pager:
        sys.stdout = os.fdopen(1)
    except KGTKException as e:
        raise e
    except Exception as e:
        raise KGTKException(str(e) + '\n')
Exemple #2
0
    def _define(self,
                inputs=None,
                doc=None,
                name=None,
                maxcache=None,
                query=None,
                match='()',
                where=None,
                opt=None,
                owhere=None,
                opt2=None,
                owhere2=None,
                with_='*',
                wwhere=None,
                ret='*',
                order=None,
                skip=None,
                limit=None,
                parameters={},
                force=False,
                index=None,
                loglevel=None,
                **kwargs):
        """Internal constructor which generates a cached query translation and a
        LRU-cachable results structure (if requested).  See 'KypherApi.get_query'
        for documentation of arguments.
        """

        if self.kgtk_query is not None:
            raise KGTKException('query has already been defined')

        inputs = kyquery.listify(inputs) or self.api.get_all_inputs()
        norm_inputs = []
        for inp in inputs:
            if self.api.get_input_info(inp) is None:
                self.api.add_input(inp)
            norm_inputs.append(self.api.get_input(inp))
        inputs = norm_inputs

        self.docstring = doc

        optionals = []
        opt and optionals.append(
            (self._subst_graph_handles(opt, inputs), owhere))
        opt2 and optionals.append(
            (self._subst_graph_handles(opt2, inputs), owhere2))
        # kwargs is an ordered dict, so the actual suffixes do not matter:
        for key, value in kwargs.items():
            if key.startswith('opt'):
                optionals.append(
                    [self._subst_graph_handles(value, inputs), None])
            elif key.startswith('owhere'):
                optionals[-1][1] = value
            else:
                raise KGTKException('Unexpected keyword argument: %s' % key)

        store = self.api.get_sql_store()
        if loglevel is None:
            loglevel = self.api.loglevel
        if index is None:
            index = self.api.index_mode
        # since we are loading results into memory, we are not using unlimited as the default:
        if limit is None:
            limit = self.api.max_results
        # -1 forces unlimited results:
        elif limit == -1:
            limit = None
        if maxcache is None:
            maxcache = self.api.max_cache_size
        self.kgtk_query = kyquery.KgtkQuery(inputs,
                                            store,
                                            loglevel=loglevel,
                                            index=index,
                                            query=query,
                                            match=self._subst_graph_handles(
                                                match, inputs),
                                            where=where,
                                            optionals=optionals,
                                            with_=(with_, wwhere),
                                            ret=ret,
                                            order=order,
                                            skip=skip,
                                            limit=limit,
                                            parameters=parameters,
                                            force=force)

        self.kgtk_query.defer_params = True
        state = self.kgtk_query.translate_to_sql()
        self.sql = state.get_sql()
        self.parameters = state.get_parameters()
        self.kgtk_query.ensure_relevant_indexes(state)
        # create memoizable execution wrapper:
        self.exec_wrapper = lambda q, p, f: q._exec(p, f)
        if maxcache > 0:
            self.exec_wrapper = lru_cache(maxsize=maxcache)(self.exec_wrapper)
        if name is not None:
            self.api.cached_queries[name] = self
        self.timestamp = self.api.timestamp
        return self
Exemple #3
0
def run(input_files: KGTKFiles, **options):
    """Run Kypher query according to the provided command-line arguments.
    """
    try:
        options = preprocess_query_options(input_files=input_files, **options)
        show_cache = options.get('show_cache')
        inputs = options.get('input_files')
        if len(inputs) == 0 and not show_cache:
            raise KGTKException('At least one input needs to be supplied')
        output = options['output']
        loglevel = options.get('loglevel')

        store = None
        try:
            graph_cache = options.get('graph_cache_file')
            store = sqlstore.SqliteStore(graph_cache, create=not os.path.exists(graph_cache),
                                         loglevel=loglevel, readonly=options.get('readonly'))

            if show_cache:
                store.describe_meta_tables(out=sys.stdout)
                return

            imports = options.get('import')
            imports and exec('import ' + imports, sqlstore.__dict__)
        
            query = kyquery.KgtkQuery(inputs, store, loglevel=loglevel,
                                      options=options.get('input_file_options'),
                                      query=options.get('query'),
                                      match=options.get('match'),
                                      where=options.get('where'),
                                      optionals=options.get('optionals'),
                                      with_=options.get('with'),
                                      ret=options.get('return'),
                                      order=options.get('order'),
                                      skip=options.get('skip'),
                                      limit=options.get('limit'),
                                      parameters=options.get('parameters'),
                                      index=options.get('index_mode'),
                                      force=options.get('force'))
            
            explain = options.get('explain')
            if explain is not None:
                result = query.explain(explain)
                output.write(result)
            else:
                result = query.execute()
                # we are forcing \n line endings here instead of \r\n, since those
                # can be re/imported efficiently with the new SQLite import command;
                # we force `escapechar' back to None to avoid generation of double
                # backslashes as in 'Buffalo \'66', which in turn will now raise errors
                # if separators in fields are encountered (which seems what we want):
                csvwriter = csv.writer(output, dialect=None, delimiter='\t',
                                       quoting=csv.QUOTE_NONE, quotechar=None,
                                       lineterminator='\n',
                                       escapechar=None)
                if not options.get('no_header'):
                    csvwriter.writerow(query.result_header)
                csvwriter.writerows(result)
                
            output.flush()
        finally:
            if store is not None:
                store.close()
            if output is not None and output is not sys.stdout:
                output.close()
        
    except sh.SignalException_SIGPIPE:
        # hack to work around Python3 issue when stdout is gone when we try to report an exception;
        # without this we get an ugly 'Exception ignored...' msg when we quit with head or a pager:
        sys.stdout = os.fdopen(1)
    except KGTKException as e:
        raise e
    except Exception as e:
        raise KGTKException(str(e) + '\n')
Exemple #4
0
    def _define(self,
                inputs=None,
                doc=None,
                name=None,
                maxcache=None,
                query=None,
                match='()',
                where=None,
                opt=None,
                owhere=None,
                opt2=None,
                owhere2=None,
                with_='*',
                wwhere=None,
                ret='*',
                order=None,
                skip=None,
                limit=None,
                parameters={},
                force=False,
                index=None,
                loglevel=None,
                cmd=None,
                **kwargs):
        """Internal constructor which generates a cached query translation and a
        LRU-cachable results structure (if requested).  See 'KypherApi.get_query'
        for documentation of arguments.
        """

        if self.kgtk_query is not None:
            raise KGTKException('query has already been defined')

        # any options specified in 'cmd' dominate others or get merged with them:
        cmd = parse_query_command(cmd)
        if name is not None and cmd.get('name', name) != name:
            raise KGTKException('multiply defined mismatched query names')
        name = cmd.get('name', name)

        inputs = cmd.get('input_files') or kyquery.listify(
            inputs) or self.api.get_all_inputs()
        norm_inputs = []
        for inp in inputs:
            if self.api.get_input_info(inp) is None:
                self.api.add_input(inp)
            norm_inputs.append(self.api.get_input(inp))
        inputs = norm_inputs

        self.docstring = cmd.get('docstring') or doc

        optionals = [(self._subst_graph_handles(opt, inputs), where)
                     for opt, where in cmd.get('optionals', [])]
        opt and optionals.append(
            (self._subst_graph_handles(opt, inputs), owhere))
        opt2 and optionals.append(
            (self._subst_graph_handles(opt2, inputs), owhere2))
        # kwargs is an ordered dict, so the actual suffixes do not matter:
        for key, value in kwargs.items():
            if key.startswith('opt'):
                optionals.append(
                    [self._subst_graph_handles(value, inputs), None])
            elif key.startswith('owhere'):
                optionals[-1][1] = value
            else:
                raise KGTKException('Unexpected keyword argument: %s' % key)

        store = self.api.get_sql_store()
        loglevel = cmd.get(
            'loglevel',
            loglevel if loglevel is not None else self.api.loglevel)
        index = cmd.get('index_mode', index or self.api.index_mode)
        # since we are loading results into memory, we are not using unlimited as the default:
        limit = cmd.get('limit',
                        limit if limit is not None else self.api.max_results)
        # -1 forces unlimited results:
        if limit in (-1, '-1'):
            limit = None
        maxcache = cmd.get(
            'maxcache',
            maxcache if maxcache is not None else self.api.max_cache_size)

        self.kgtk_query = kyquery.KgtkQuery(
            inputs,
            store,
            loglevel=loglevel,
            index=index,
            query=cmd.get('query', query),
            match=self._subst_graph_handles(cmd.get('match', match), inputs),
            where=cmd.get('where', where),
            optionals=optionals,
            with_=cmd.get('with', (with_, wwhere)),
            ret=cmd.get('return', ret),
            order=cmd.get('order', order),
            skip=cmd.get('skip', skip),
            limit=limit,
            parameters=cmd.get('parameters', parameters),
            force=cmd.get('force', force))

        self.kgtk_query.defer_params = True
        state = self.kgtk_query.translate_to_sql()
        self.sql = state.get_sql()
        self.parameters = state.get_parameters()
        self.kgtk_query.ensure_relevant_indexes(state)
        # create memoizable execution wrapper:
        self.exec_wrapper = lambda q, p, f: q._exec(p, f)
        if maxcache > 0:
            self.exec_wrapper = lru_cache(maxsize=maxcache)(self.exec_wrapper)
        if name is not None:
            self.api.cached_queries[name] = self
        self.timestamp = self.api.timestamp
        return self