def load_property_labels_file(input_files: typing.List[str]): labels_dict: typing.MutableMapping[str, str] = {} headers: typing.Optional[typing.List[str]] = None for each_file in input_files: with open(each_file, "r") as f: each_line: str for each_line in f.readlines(): fields: typing.List[str] = each_line.replace("\n", "").split("\t") if headers is None: headers = fields if len(headers) < 2: raise KGTKException( "No enough columns found on given input file. Only {} columns given but at least 2 needed.".format( len(headers))) elif "predicate" in headers and "label" in headers: column_references = {"predicate": headers.index("predicate"), "label": headers.index("label")} elif "label" in headers: column_references = {"predicate": 0, "label": headers.index("label"), } else: raise KGTKException("Can't determine which column is label column for label file!") else: node_id: str = fields[column_references["predicate"]] node_label: str = fields[column_references["label"]] if "@en" in node_label: node_label = node_label.replace("'", "").split("@")[0] labels_dict[node_id] = node_label if node_id not in labels_dict: labels_dict[node_id] = node_label return labels_dict
def run( input_file: KGTKFiles, output_file: KGTKFiles, key_column_names: typing.List[str], errors_to_stdout: bool = False, errors_to_stderr: bool = True, show_options: bool = False, verbose: bool = False, very_verbose: bool = False, **kwargs # Whatever KgtkFileOptions and KgtkValueOptions want. ) -> int: # import modules locally from pathlib import Path import sys from kgtk.exceptions import KGTKException from kgtk.io.kgtkreader import KgtkReader, KgtkReaderOptions from kgtk.io.kgtkwriter import KgtkWriter from kgtk.reshape.kgtkexpand import KgtkExpand from kgtk.value.kgtkvalueoptions import KgtkValueOptions input_kgtk_file: Path = KGTKArgumentParser.get_input_file(input_file) output_kgtk_file: Path = KGTKArgumentParser.get_output_file(output_file) # Select where to send error messages, defaulting to stderr. error_file: typing.TextIO = sys.stdout if errors_to_stdout else sys.stderr # Build the option structures. reader_options: KgtkReaderOptions = KgtkReaderOptions.from_dict(kwargs) value_options: KgtkValueOptions = KgtkValueOptions.from_dict(kwargs) # Show the final option structures for debugging and documentation. if show_options: print("--input-file=%s" % str(input_kgtk_file), file=error_file) print("--output-file=%s" % str(output_kgtk_file), file=error_file) print("--columns=%s" % " ".join(key_column_names), file=error_file) reader_options.show(out=error_file) value_options.show(out=error_file) print("=======", file=error_file, flush=True) try: ex: KgtkExpand = KgtkExpand( input_file_path=input_kgtk_file, key_column_names=key_column_names, output_file_path=output_kgtk_file, reader_options=reader_options, value_options=value_options, error_file=error_file, verbose=verbose, very_verbose=very_verbose, ) ex.process() return 0 except SystemExit as e: raise KGTKException("Exit requested") except Exception as e: raise KGTKException(str(e))
def run( input_file: KGTKFiles, output_file: KGTKFiles, filter_column_names: typing.List[str], all_are: bool = False, only_count: bool = False, errors_to_stdout: bool = False, errors_to_stderr: bool = True, show_options: bool = False, verbose: bool = False, very_verbose: bool = False, **kwargs # Whatever KgtkFileOptions and KgtkValueOptions want. ) -> int: # import modules locally from kgtk.exceptions import KGTKException input_kgtk_file: Path = KGTKArgumentParser.get_input_file(input_file) output_kgtk_file: Path = KGTKArgumentParser.get_output_file(output_file) # Select where to send error messages, defaulting to stderr. error_file: typing.TextIO = sys.stdout if errors_to_stdout else sys.stderr # Build the option structures. reader_options: KgtkReaderOptions = KgtkReaderOptions.from_dict(kwargs) value_options: KgtkValueOptions = KgtkValueOptions.from_dict(kwargs) # Show the final option structures for debugging and documentation. if show_options: print("--input-file=%s" % str(input_kgtk_file), file=error_file) print("--output-file=%s" % str(output_kgtk_file), file=error_file) print("--columns=%s" % " ".join(filter_column_names), file=error_file) print("--count=%s" % str(only_count), file=error_file) print("--all=%s" % str(all_are), file=error_file) reader_options.show(out=error_file) value_options.show(out=error_file) print("=======", file=error_file, flush=True) try: ie: KgtkIfEmpty = KgtkIfEmpty( input_file_path=input_kgtk_file, filter_column_names=filter_column_names, output_file_path=output_kgtk_file, all_are=all_are, notempty=False, only_count=only_count, reader_options=reader_options, value_options=value_options, error_file=error_file, verbose=verbose, very_verbose=very_verbose, ) ie.process() return 0 except SystemExit as e: raise KGTKException("Exit requested") except Exception as e: raise KGTKException(str(e))
def parse_query_command(command): """Parse a query 'command' into a normalized options dictionary. 'command' may be a single string or an iterable of parsed arguments and may optionally contain 'kgtk' and 'query' particles if one wants to mirror top-level KGTK query commands exactly. """ args = None if isinstance(command, str): args = shlex.split(command) elif command is not None: args = [str(arg) for arg in command] if not args: return {} parsed_args, rest_args = COMMAND_ARGUMENT_PARSER.parse_known_args(args) for rarg in rest_args: if rarg not in ('kgtk', 'query'): raise KGTKException(f'illegal query API option: {rarg}') # convert to dict: parsed_args = vars(parsed_args) if parsed_args.get('output') not in (None, '-'): raise KGTKException('output specification not supported in query API') loglevel = parsed_args.get('loglevel') options = cliquery.preprocess_query_options(**parsed_args) # if loglevel was set explicitly, override with it: if loglevel is not None: options['loglevel'] = loglevel elif options.get('loglevel') == 0: # undo the default in favor of API default: options['loglevel'] = None # remove all None values so we can use 'get' with defaults: return {k: v for k, v in options.items() if v is not None}
def run( input_file: KGTKFiles, output_file: KGTKFiles, output_format: str, errors_to_stdout: bool = False, errors_to_stderr: bool = True, show_options: bool = False, verbose: bool = False, very_verbose: bool = False, **kwargs # Whatever KgtkFileOptions and KgtkValueOptions want. ) -> int: # import modules locally from kgtk.exceptions import KGTKException input_file_path: Path = KGTKArgumentParser.get_input_file(input_file) output_file_path: Path = KGTKArgumentParser.get_output_file(output_file) # Select where to send error messages, defaulting to stderr. error_file: typing.TextIO = sys.stdout if errors_to_stdout else sys.stderr # TODO: check that at most one input file is stdin? # Build the option structures. reader_options: KgtkReaderOptions = KgtkReaderOptions.from_dict(kwargs) value_options: KgtkValueOptions = KgtkValueOptions.from_dict(kwargs) # Show the final option structures for debugging and documentation. if show_options: print("--input-file=%s" % str(input_file_path), file=error_file, flush=True) print("--output-file=%s" % str(output_file_path), file=error_file, flush=True) reader_options.show(out=error_file) value_options.show(out=error_file) print("=======", file=error_file, flush=True) try: kc: KgtkCat = KgtkCat(input_file_paths=[input_file_path], output_path=output_file_path, output_format=output_format, reader_options=reader_options, value_options=value_options, error_file=error_file, verbose=verbose, very_verbose=very_verbose) kc.process() return 0 except SystemExit as e: raise KGTKException("Exit requested") except Exception as e: raise KGTKException(str(e))
def run( input_kgtk_file: typing.Optional[Path], output_kgtk_file: typing.Optional[Path], key_column_names: typing.List[str], errors_to_stdout: bool = False, errors_to_stderr: bool = True, show_options: bool = False, verbose: bool = False, very_verbose: bool = False, **kwargs # Whatever KgtkFileOptions and KgtkValueOptions want. ) -> int: # import modules locally from kgtk.exceptions import KGTKException # Select where to send error messages, defaulting to stderr. error_file: typing.TextIO = sys.stdout if errors_to_stdout else sys.stderr # Build the option structures. reader_options: KgtkReaderOptions = KgtkReaderOptions.from_dict(kwargs) value_options: KgtkValueOptions = KgtkValueOptions.from_dict(kwargs) # Show the final option structures for debugging and documentation. if show_options: print("input: %s" % (str(input_kgtk_file) if input_kgtk_file is not None else "-"), file=error_file) print("--columns=%s" % " ".join(key_column_names), file=error_file) print("--output-file=%s" % (str(output_kgtk_file) if output_kgtk_file is not None else "-"), file=error_file) reader_options.show(out=error_file) value_options.show(out=error_file) print("=======", file=error_file, flush=True) try: ex: KgtkExpand = KgtkExpand( input_file_path=input_kgtk_file, key_column_names=key_column_names, output_file_path=output_kgtk_file, reader_options=reader_options, value_options=value_options, error_file=error_file, verbose=verbose, very_verbose=very_verbose, ) ex.process() return 0 except SystemExit as e: raise KGTKException("Exit requested") except Exception as e: raise KGTKException(str(e))
def run( kgtk_browser_host: str = '0.0.0.0', kgtk_browser_port: str = '5000', kgtk_browser_config: str = 'kgtk_browser_config.py', kgtk_browser_app: str = 'kgtk_browser_app.py', errors_to_stdout: bool = False, errors_to_stderr: bool = True, show_options: bool = False, verbose: bool = False, very_verbose: bool = False, **kwargs # Whatever KgtkFileOptions and KgtkValueOptions want. ) -> int: # import modules locally from pathlib import Path import simplejson as json import webbrowser import threading import os, sys import typing from kgtk.exceptions import KGTKException # Select where to send error messages, defaulting to stderr. error_file: typing.TextIO = sys.stdout if errors_to_stdout else sys.stderr try: # Set the flask app and configuration file settings os.environ["FLASK_APP"] = kgtk_browser_app os.environ["KGTK_BROWSER_CONFIG"] = kgtk_browser_config # Open the default web browser at the kgtk-browser location url = "http://{}:{}/browser".format(kgtk_browser_host, kgtk_browser_port) threading.Timer(2.5, lambda: webbrowser.open(url)).start() # Run flask app using the selected host and port os.system( "flask run --host {} --port {}".format( kgtk_browser_host, kgtk_browser_port, ) ) return 0 except SystemExit as e: raise KGTKException("Exit requested") except Exception as e: raise KGTKException(str(e))
def run(input=None, output=None, columns='1', reverse=False, space=False, speed=False, extra='', tsv=False, csv=False, _dt=None): """Run sort according to the provided command-line arguments. """ try: colsep = '\t' if not tsv and (csv or _dt == 'csv'): colsep = ',' options = extra if reverse: options += ' -r' if space: options += ' ' + space_config elif speed: options += ' ' + speed_config pipe = build_command(input=input, output=output, columns=columns, colsep=colsep, options=options) return zcat.run_sh_commands(pipe).exit_code except sh.SignalException_SIGPIPE: # hack to work around Python3 issue when stdout is gone when we try to report an exception; # without this we get an ugly 'Exception ignored...' msg when we quit with head or a pager: sys.stdout = os.fdopen(1) except Exception as e: #import traceback #traceback.print_tb(sys.exc_info()[2], 10) raise KGTKException('INTERNAL ERROR: ' + type(e).__module__ + '.' + str(e) + '\n')
def run(labels: str, aliases: str, descriptions: str, property_file: str, n: int, truthy: bool, warning: bool, use_id: bool, log_path: str, prop_declaration: bool, prefix_path: str, input_file: KGTKFiles, output_file: str, error_action: str): # import modules locally from kgtk.generator import TripleGenerator from kgtk.exceptions import KGTKException generator = TripleGenerator(prop_file=property_file, label_set=labels, alias_set=aliases, description_set=descriptions, n=n, warning=warning, truthy=truthy, use_id=use_id, dest_fp=output_file, log_path=log_path, prop_declaration=prop_declaration, prefix_path=prefix_path, input_file=input_file, error_action=error_action) try: generator.process() except Exception as e: raise KGTKException(e)
def build_sort_key_spec(header, columns, colsep='\t'): """Given a KGTK file `header' line and a user-provided `columns' spec, generate a sequence of Unix sort key definitions representative of those columns. For example, columns=subject,object will translate into '-k 1,1 -k 3,3'. Columns can be specified by the names used in the file header line, as 1-based positions, or through the pre-defined positions of reserved names such as `subject', etc. Columns found in the header will override any predefined positions. """ import re columns = [ c.strip() for c in re.split(column_spec_split_regex, columns.strip()) ] header = [c.strip() for c in header.split(colsep)] keys = [] for col in columns: if col == '': continue index = None if col in header: index = header.index(col) + 1 else: try: index = int(col) except: pass if index is None: index = reserved_name_columns.get(col) if index is None: raise KGTKException('Unknown column: ' + col) keys.append('-k %d,%d' % (index, index)) # special whitespace at the end is used by `wait_for_key_spec' below: return ' '.join(keys) + ' \t'
def run(input_file: KGTKFiles, output_file: KGTKFiles, no_header: bool = False, properties: str = '', undirected: bool = False, strong: bool = False, **kwargs # Whatever KgtkFileOptions and KgtkValueOptions want. ) -> int: from kgtk.gt.connected_components import ConnectedComponents from kgtk.exceptions import KGTKException input_kgtk_file: Path = KGTKArgumentParser.get_input_file(input_file) output_kgtk_file: Path = KGTKArgumentParser.get_output_file(output_file) cc: ConnectedComponents = ConnectedComponents(input_file_path=input_kgtk_file, output_file_path=output_kgtk_file, no_header=no_header, properties=properties, undirected=undirected, strong=strong) try: cc.process() return 0 except Exception as e: raise KGTKException(str(e))
def execute_to_file(self, file=sys.stdout, noheader=False, **params): """Execute this query with the given 'params' and write the result to the file or file-like object 'file' in KGTK format. Output a header unless 'noheader' is true. """ self.refresh() if hasattr(self.exec_wrapper, 'cache_clear'): # if this is a re-call of a caching query, ensure it is cleared, # since the result iterator was used up in the previous call: self.exec_wrapper.cache_clear() parameters = self._subst_params(self.parameters, params) try: out = open(file, 'w') if isinstance(file, str) else file if not hasattr(out, 'write'): raise KGTKException('expected file or file-like object') result = self.exec_wrapper(self, parameters, iter) csvwriter = csv.writer(out, dialect=None, delimiter='\t', quoting=csv.QUOTE_NONE, quotechar=None, lineterminator='\n', escapechar=None) if not noheader: csvwriter.writerow(self.get_result_header()) csvwriter.writerows(result) finally: if isinstance(file, str): out.close()
def _exec(self, parameters, fmt): """Internal query execution wrapper that can easily be memoized. """ # TO DO: abstract some of this better in KgtkQuery API kgtk_query = self.kgtk_query result = kgtk_query.store.execute(self.sql, parameters) if kgtk_query.result_header is None: kgtk_query.result_header = [ kgtk_query.unalias_column_name(c[0]) for c in result.description ] if fmt is None: # convert to list so we can reuse if we memoize: return tuple(result) # allow types and their names: fmt = hasattr(fmt, '__name__') and fmt.__name__ or str(fmt) if fmt == 'iter': return result elif fmt == 'tuple': return tuple(result) elif fmt == 'list': return list(result) elif fmt in ('df', 'dataframe', 'DataFrame'): if not _have_pandas: _import_pandas() return pd.DataFrame(result, columns=kgtk_query.result_header) # TO DO: consider supporting namedtuple and/or sqlite3.Row as row_factory types # (for sqlite3.Row we have the issue that aliases become keys()) else: raise KGTKException('unsupported query result format: %s' % fmt)
def calculate_distance(a, b): if len(a) != len(b): raise KGTKException("Vector dimension are different!") dist = 0 for v1, v2 in zip(a, b): dist += (v1 - v2)**2 dist = dist**0.5 return dist
def run(filename, directed, log_file, output): from kgtk.exceptions import KGTKException def infer_index(h, options=[]): for o in options: if o in h: return h.index(o) return -1 def infer_predicate(h, options=[]): for o in options: if o in h: return o return '' try: # import modules locally import socket from graph_tool import load_graph_from_csv from graph_tool import centrality import kgtk.gt.analysis_utils as gtanalysis import sys with open(filename, 'r') as f: header = next(f).split('\t') subj_index = infer_index(header, options=['node1', 'subject']) obj_index = infer_index(header, options=['node2', 'object', 'value']) predicate = infer_predicate( header, options=['property', 'predicate', 'label']) p = [] for i, header_col in enumerate(header): if i in [subj_index, obj_index]: continue p.append(header_col) with open(log_file, 'w') as writer: writer.write('loading the TSV graph now ...\n') G2 = load_graph_from_csv(filename, skip_first=True, directed=directed, hashed=True, ecols=[subj_index, obj_index], eprop_names=p, csv_options={'delimiter': '\t'}) writer.write('graph loaded! It has %d nodes and %d edges\n' % (G2.num_vertices(), G2.num_edges())) writer.write('\n###Top relations:\n') for rel, freq in gtanalysis.get_topN_relations( G2, pred_property=predicate): writer.write('%s\t%d\n' % (rel, freq)) if output: writer.write('now saving the graph to %s\n' % output) G2.save(output) except Exception as e: raise KGTKException('Error: ' + str(e))
def wait_for_key_spec(): import time for i in range(100): # try for at most 5 secs: with open(sort_env['KGTK_SORT_KEY_SPEC'], 'r') as inp: if inp.read().endswith('\t'): break time.sleep(0.05) else: raise KGTKException('INTERNAL ERROR: failed to communicate sort key')
def run(input_file, output_file, columns='1', reverse=False, space=False, speed=False, extra='', tsv=False, csv=False, _dt=None, naptime=1): import time import kgtk.cli.zconcat as zcat time.sleep(int(naptime)) # print("Sort running.", file=sys.stderr, flush=True) # *** input = str(KGTKArgumentParser.get_input_file(input_file)) output = str(KGTKArgumentParser.get_output_file(output_file)) if output == "-": output = None # logging.basicConfig(level=logging.INFO) """Run sort according to the provided command-line arguments. """ try: colsep = '\t' if not tsv and (csv or _dt == 'csv'): colsep = ',' options = extra if reverse: options += ' -r' if space: options += ' ' + space_config elif speed: options += ' ' + speed_config pipe = build_command(input=input, output=output, columns=columns, colsep=colsep, options=options) # print("pipe: %s" % str(pipe), file=sys.stderr, flush=True) # *** return zcat.run_sh_commands(pipe).exit_code except sh.SignalException_SIGPIPE: # hack to work around Python3 issue when stdout is gone when we try to report an exception; # without this we get an ugly 'Exception ignored...' msg when we quit with head or a pager: sys.stdout = os.fdopen(1) except Exception as e: #import traceback #traceback.print_tb(sys.exc_info()[2], 10) raise KGTKException('INTERNAL ERROR: ' + type(e).__module__ + '.' + str(e) + '\n')
def get_input(self, key): """Get the canonical input named by 'key' so it can be used with 'get_query'. 'key' maybe an input file name, input file alias or API-local name. """ info = self.get_input_info(key) if info is None: raise KGTKException('no input named by key: %s' % key) inp = info.get('alias') if inp is None: inp = info.get('file') return inp
def get_result_header(self, error=True): """Return the list of column names for this query. This requires the query to have run at least once (also again after caches were cleared). """ self.refresh() header = self.kgtk_query.result_header if header is None and error: raise KGTKException( 'query needs to be run at least once to access its result header' ) return header
def _get_query(self, query, error=True): """Internal accessor that allows transparent 'query' access via objects or names. """ if isinstance(query, KypherQuery): return query kypher_query = self.cached_queries.get(query) if kypher_query is not None: return kypher_query elif error: raise KGTKException('cannot find query with name: %s' % query) else: return None
def wait_for_key_spec(): # print("wait_for_key_spec", file=sys.stderr, flush=True) # *** import time for i in range(100): # try for at most 5 secs: with open(sort_env['KGTK_SORT_KEY_SPEC'], 'r') as inp: x = inp.read() # print("inp: '%s'" % x, file=sys.stderr, flush=True) # *** if x.endswith('\t'): break time.sleep(0.05) else: raise KGTKException( 'INTERNAL ERROR: failed to communicate sort key')
def __call__(self, parser, namespace, values, option_string=None): input_options = getattr(namespace, 'input_file_options', {}) or {} inputs = KGTKArgumentParser.get_input_file_list(getattr(namespace, 'input_files', [])) if len(inputs) < 1: raise KGTKException('out-of-place input option: %s' % option_string) # normalize path objects to strings: input_file = str(inputs[-1]) # handle boolean args (also requires nargs=0): if self.type == bool: values = True # we use self.dest as the key for this particular option: input_options.setdefault(input_file, {})[self.dest] = values setattr(namespace, 'input_file_options', input_options)
def generate_prop_declaration_triple(self, node1: str, node2: str) -> bool: # update the known prop_types if node1 in self.prop_types: if not self.prop_declaration: raise KGTKException( "Duplicated property definition of {} found!".format( node1)) else: self.prop_types[node1] = node2 prop = WDProperty(node1, self.datatype_mapping[node2]) self.doc.kg.add_subject(prop) return True
def run( input_file: KGTKFiles, output_file: KGTKFiles, properties: str = '', undirected: bool = False, strong: bool = False, # The following have been modified to postpone importing gtaph_tools. # ClusterComponents cann't be referenced here. cluster_name_method: typing.Optional[typing.Any] = None, cluster_name_separator: typing.Optional[str] = None, cluster_name_prefix: typing.Optional[str] = None, cluster_name_zfill: typing.Optional[int] = None, minimum_cluster_size: typing.Optional[int] = None, **kwargs # Whatever KgtkFileOptions and KgtkValueOptions want. ) -> int: from pathlib import Path from kgtk.exceptions import KGTKException from kgtk.gt.connected_components import ConnectedComponents from kgtk.io.kgtkreader import KgtkReader, KgtkReaderOptions input_kgtk_file: Path = KGTKArgumentParser.get_input_file(input_file) output_kgtk_file: Path = KGTKArgumentParser.get_output_file(output_file) # It's OK to mention ClusterComponents here. cluster_name_method_x: ConnectedComponents.Method = \ cluster_name_method if cluster_name_method is not None else ConnectedComponents.DEFAULT_CLUSTER_NAME_METHOD cluster_name_separator = ConnectedComponents.DEFAULT_CLUSTER_NAME_SEPARATOR if cluster_name_separator is None else cluster_name_separator cluster_name_prefix = ConnectedComponents.DEFAULT_CLUSTER_NAME_PREFIX if cluster_name_prefix is None else cluster_name_prefix cluster_name_zfill = ConnectedComponents.DEFAULT_CLUSTER_NAME_ZFILL if cluster_name_zfill is None else cluster_name_zfill minimum_cluster_size = ConnectedComponents.DEFAULT_MINIMUM_CLUSTER_SIZE if minimum_cluster_size is None else minimum_cluster_size cc: ConnectedComponents = ConnectedComponents( input_file_path=input_kgtk_file, output_file_path=output_kgtk_file, properties=properties, undirected=undirected, strong=strong, cluster_name_method=cluster_name_method_x, cluster_name_separator=cluster_name_separator, cluster_name_prefix=cluster_name_prefix, cluster_name_zfill=cluster_name_zfill, minimum_cluster_size=minimum_cluster_size, ) try: cc.process() return 0 except Exception as e: raise KGTKException(str(e))
def set_properties(self, prop_file: str): self.prop_types = {} if prop_file == "NONE": return with open(prop_file, "r") as fp: props = fp.readlines() for line in props[1:]: node1, _, node2, = line.split("\t") try: self.prop_types[node1] = self.datatype_mapping[node2.strip()] except: raise KGTKException( "DataType {} of node {} is not supported.\n".format( node2, node1))
def run(input_files: KGTKFiles, output=None, gz=False, bz2=False, xz=False, _debug=False): """Run zconcat according to the provided command-line arguments. """ # TO DO: figure out how to properly access shared --debug option try: inputs: typing.List[str] = [str(input_file) for input_file in KGTKArgumentParser.get_input_file_list(input_files)] commands = build_command(inputs=inputs, output=output, gz=gz, bz2=bz2, xz=xz) return run_sh_commands(commands, debug=_debug).exit_code except sh.SignalException_SIGPIPE: # cleanup in case we piped and terminated prematurely: sys.stdout.flush() except Exception as e: #import traceback #traceback.print_tb(sys.exc_info()[2], 10) raise KGTKException('INTERNAL ERROR: ' + str(e) + '\n')
def __init__(self, api, **kwargs): """Create a query object linked to the KypherApi 'api'. All other arguments are passed to 'KypherQuery._define' (which see). """ if not isinstance(api, KypherApi): raise KGTKException( 'query needs to be linked to existing API object') self.api = api self.kgtk_query = None self.sql = None self.parameters = None self.exec_wrapper = None self.definition_args = kwargs self.timestamp = -1 self._define(**kwargs)
def process_presorted_input(self, kr: KgtkReader, kw: KgtkWriter): """The input file must be sorted by node1.""" if self.verbose: print("Processing presorted input.", file=self.error_file, flush=True) input_rows: int = 0 each_node_attributes: Lexicalize.EACH_NODE_ATTRIBUTES = self.new_each_node_attributes( ) previous_node_id: typing.Optional[str] = None current_process_node_id: typing.Optional[str] = None node_id: typing.Optional[str] = None rownum: int row: typing.List[str] for rownum, row in enumerate(kr): input_rows += 1 node_id = row[kr.node1_column_idx] node_property: str = row[kr.label_column_idx] node_value: str = row[kr.node2_column_idx] # Ensure that the input file is sorted (node1 lowest to highest): if previous_node_id is None: each_node_attributes = self.new_each_node_attributes() previous_node_id = node_id elif previous_node_id > node_id: raise KGTKException("Row %d is out of order: %s > %s" % (rownum + 1, previous_node_id, node_id)) elif previous_node_id < node_id: self.process_qnode(kw, previous_node_id, each_node_attributes) each_node_attributes = self.new_each_node_attributes() previous_node_id = node_id self.process_row(node_id, node_property, node_value, each_node_attributes) if node_id is not None: # Processing the final qnode in the input file self.process_qnode(kw, node_id, each_node_attributes) if self.verbose: print("Processed %d input rows." % (input_rows), file=self.error_file, flush=True)
def run(name, info, error): """ Arguments here should be defined in `add_arguments` first. The return value (integer) will be the return code in shell. It will set to 0 if no value returns. Though you can return a non-zero value to indicate error, raise exceptions defined in kgtk.exceptions is preferred since this gives user an unified error code and message. """ # import modules locally import socket from kgtk.exceptions import KGTKException if error: raise KGTKException('An error here\n') print('name: {}, info: {}\nhost: {}'.format(name, info, socket.gethostname()))
def run(**kwargs): from kgtk.utils.elasticsearch_manager import ElasticsearchManager try: ElasticsearchManager.build_kgtk_search_input(kwargs['input_file_path'], kwargs['label_properties'], kwargs['mapping_file_path'], kwargs['output_file_path'], alias_fields=kwargs['alias_properties'], extra_alias_properties=kwargs['extra_alias_properties'], pagerank_fields=kwargs['pagerank_properties'], description_properties=kwargs['description_properties'], add_text=kwargs['add_text'], property_datatype_file=kwargs['property_datatype_file'] ) except: message = 'Command: build-kgtk-search-input\n' message += 'Error Message: {}\n'.format(traceback.format_exc()) raise KGTKException(message)