def run(input_files: KGTKFiles, **options): """Run Kypher query according to the provided command-line arguments. """ try: import_modules() debug = options.get('_debug', False) expert = options.get('_expert', False) loglevel = debug and 1 or 0 if debug and expert: loglevel = 2 print('OPTIONS:', options) # normalize path objects to strings: inputs = [str(f) for f in KGTKArgumentParser.get_input_file_list(input_files)] if len(inputs) == 0: raise KGTKException('At least one input needs to be supplied') output = options.get('output') if output == '-': output = sys.stdout if isinstance(output, str): output = sqlstore.open_to_write(output, mode='wt') parameters = parse_query_parameters(regular=options.get('regular_paras') or [], string=options.get('string_paras') or [], lqstring=options.get('lqstring_paras') or []) try: graph_cache = options.get('graph_cache_file') store = sqlstore.SqliteStore(graph_cache, create=not os.path.exists(graph_cache), loglevel=loglevel) query = kyquery.KgtkQuery(inputs, store, loglevel=loglevel, options=options.get('input_file_options'), query=options.get('query'), match=options.get('match'), where=options.get('where'), ret=options.get('return_'), order=options.get('order'), skip=options.get('skip'), limit=options.get('limit'), parameters=parameters, index=options.get('index')) explain = options.get('explain') if explain is not None: result = query.explain(explain) output.write(result) else: result = query.execute() # we are forcing \n line endings here instead of \r\n, since those # can be re/imported efficiently with the new SQLite import command; # we force `escapechar' back to None to avoid generation of double # backslashes as in 'Buffalo \'66', which in turn will now raise errors # if separators in fields are encountered (which seems what we want): csvwriter = csv.writer(output, dialect=None, delimiter='\t', quoting=csv.QUOTE_NONE, quotechar=None, lineterminator='\n', escapechar=None) if not options.get('no_header'): csvwriter.writerow(query.result_header) csvwriter.writerows(result) output.flush() finally: store.close() if output is not None and output is not sys.stdout: output.close() except sh.SignalException_SIGPIPE: # hack to work around Python3 issue when stdout is gone when we try to report an exception; # without this we get an ugly 'Exception ignored...' msg when we quit with head or a pager: sys.stdout = os.fdopen(1) except KGTKException as e: raise e except Exception as e: raise KGTKException(str(e) + '\n')
def _define(self, inputs=None, doc=None, name=None, maxcache=None, query=None, match='()', where=None, opt=None, owhere=None, opt2=None, owhere2=None, with_='*', wwhere=None, ret='*', order=None, skip=None, limit=None, parameters={}, force=False, index=None, loglevel=None, **kwargs): """Internal constructor which generates a cached query translation and a LRU-cachable results structure (if requested). See 'KypherApi.get_query' for documentation of arguments. """ if self.kgtk_query is not None: raise KGTKException('query has already been defined') inputs = kyquery.listify(inputs) or self.api.get_all_inputs() norm_inputs = [] for inp in inputs: if self.api.get_input_info(inp) is None: self.api.add_input(inp) norm_inputs.append(self.api.get_input(inp)) inputs = norm_inputs self.docstring = doc optionals = [] opt and optionals.append( (self._subst_graph_handles(opt, inputs), owhere)) opt2 and optionals.append( (self._subst_graph_handles(opt2, inputs), owhere2)) # kwargs is an ordered dict, so the actual suffixes do not matter: for key, value in kwargs.items(): if key.startswith('opt'): optionals.append( [self._subst_graph_handles(value, inputs), None]) elif key.startswith('owhere'): optionals[-1][1] = value else: raise KGTKException('Unexpected keyword argument: %s' % key) store = self.api.get_sql_store() if loglevel is None: loglevel = self.api.loglevel if index is None: index = self.api.index_mode # since we are loading results into memory, we are not using unlimited as the default: if limit is None: limit = self.api.max_results # -1 forces unlimited results: elif limit == -1: limit = None if maxcache is None: maxcache = self.api.max_cache_size self.kgtk_query = kyquery.KgtkQuery(inputs, store, loglevel=loglevel, index=index, query=query, match=self._subst_graph_handles( match, inputs), where=where, optionals=optionals, with_=(with_, wwhere), ret=ret, order=order, skip=skip, limit=limit, parameters=parameters, force=force) self.kgtk_query.defer_params = True state = self.kgtk_query.translate_to_sql() self.sql = state.get_sql() self.parameters = state.get_parameters() self.kgtk_query.ensure_relevant_indexes(state) # create memoizable execution wrapper: self.exec_wrapper = lambda q, p, f: q._exec(p, f) if maxcache > 0: self.exec_wrapper = lru_cache(maxsize=maxcache)(self.exec_wrapper) if name is not None: self.api.cached_queries[name] = self self.timestamp = self.api.timestamp return self
def run(input_files: KGTKFiles, **options): """Run Kypher query according to the provided command-line arguments. """ try: options = preprocess_query_options(input_files=input_files, **options) show_cache = options.get('show_cache') inputs = options.get('input_files') if len(inputs) == 0 and not show_cache: raise KGTKException('At least one input needs to be supplied') output = options['output'] loglevel = options.get('loglevel') store = None try: graph_cache = options.get('graph_cache_file') store = sqlstore.SqliteStore(graph_cache, create=not os.path.exists(graph_cache), loglevel=loglevel, readonly=options.get('readonly')) if show_cache: store.describe_meta_tables(out=sys.stdout) return imports = options.get('import') imports and exec('import ' + imports, sqlstore.__dict__) query = kyquery.KgtkQuery(inputs, store, loglevel=loglevel, options=options.get('input_file_options'), query=options.get('query'), match=options.get('match'), where=options.get('where'), optionals=options.get('optionals'), with_=options.get('with'), ret=options.get('return'), order=options.get('order'), skip=options.get('skip'), limit=options.get('limit'), parameters=options.get('parameters'), index=options.get('index_mode'), force=options.get('force')) explain = options.get('explain') if explain is not None: result = query.explain(explain) output.write(result) else: result = query.execute() # we are forcing \n line endings here instead of \r\n, since those # can be re/imported efficiently with the new SQLite import command; # we force `escapechar' back to None to avoid generation of double # backslashes as in 'Buffalo \'66', which in turn will now raise errors # if separators in fields are encountered (which seems what we want): csvwriter = csv.writer(output, dialect=None, delimiter='\t', quoting=csv.QUOTE_NONE, quotechar=None, lineterminator='\n', escapechar=None) if not options.get('no_header'): csvwriter.writerow(query.result_header) csvwriter.writerows(result) output.flush() finally: if store is not None: store.close() if output is not None and output is not sys.stdout: output.close() except sh.SignalException_SIGPIPE: # hack to work around Python3 issue when stdout is gone when we try to report an exception; # without this we get an ugly 'Exception ignored...' msg when we quit with head or a pager: sys.stdout = os.fdopen(1) except KGTKException as e: raise e except Exception as e: raise KGTKException(str(e) + '\n')
def _define(self, inputs=None, doc=None, name=None, maxcache=None, query=None, match='()', where=None, opt=None, owhere=None, opt2=None, owhere2=None, with_='*', wwhere=None, ret='*', order=None, skip=None, limit=None, parameters={}, force=False, index=None, loglevel=None, cmd=None, **kwargs): """Internal constructor which generates a cached query translation and a LRU-cachable results structure (if requested). See 'KypherApi.get_query' for documentation of arguments. """ if self.kgtk_query is not None: raise KGTKException('query has already been defined') # any options specified in 'cmd' dominate others or get merged with them: cmd = parse_query_command(cmd) if name is not None and cmd.get('name', name) != name: raise KGTKException('multiply defined mismatched query names') name = cmd.get('name', name) inputs = cmd.get('input_files') or kyquery.listify( inputs) or self.api.get_all_inputs() norm_inputs = [] for inp in inputs: if self.api.get_input_info(inp) is None: self.api.add_input(inp) norm_inputs.append(self.api.get_input(inp)) inputs = norm_inputs self.docstring = cmd.get('docstring') or doc optionals = [(self._subst_graph_handles(opt, inputs), where) for opt, where in cmd.get('optionals', [])] opt and optionals.append( (self._subst_graph_handles(opt, inputs), owhere)) opt2 and optionals.append( (self._subst_graph_handles(opt2, inputs), owhere2)) # kwargs is an ordered dict, so the actual suffixes do not matter: for key, value in kwargs.items(): if key.startswith('opt'): optionals.append( [self._subst_graph_handles(value, inputs), None]) elif key.startswith('owhere'): optionals[-1][1] = value else: raise KGTKException('Unexpected keyword argument: %s' % key) store = self.api.get_sql_store() loglevel = cmd.get( 'loglevel', loglevel if loglevel is not None else self.api.loglevel) index = cmd.get('index_mode', index or self.api.index_mode) # since we are loading results into memory, we are not using unlimited as the default: limit = cmd.get('limit', limit if limit is not None else self.api.max_results) # -1 forces unlimited results: if limit in (-1, '-1'): limit = None maxcache = cmd.get( 'maxcache', maxcache if maxcache is not None else self.api.max_cache_size) self.kgtk_query = kyquery.KgtkQuery( inputs, store, loglevel=loglevel, index=index, query=cmd.get('query', query), match=self._subst_graph_handles(cmd.get('match', match), inputs), where=cmd.get('where', where), optionals=optionals, with_=cmd.get('with', (with_, wwhere)), ret=cmd.get('return', ret), order=cmd.get('order', order), skip=cmd.get('skip', skip), limit=limit, parameters=cmd.get('parameters', parameters), force=cmd.get('force', force)) self.kgtk_query.defer_params = True state = self.kgtk_query.translate_to_sql() self.sql = state.get_sql() self.parameters = state.get_parameters() self.kgtk_query.ensure_relevant_indexes(state) # create memoizable execution wrapper: self.exec_wrapper = lambda q, p, f: q._exec(p, f) if maxcache > 0: self.exec_wrapper = lru_cache(maxsize=maxcache)(self.exec_wrapper) if name is not None: self.api.cached_queries[name] = self self.timestamp = self.api.timestamp return self