def __init__(self, *args, **kwargs): super(IsiReader, self).__init__(*args, **kwargs) # Define some extra directories self.nxml_dir = get_dir(self.tmp_dir, 'nxmls') self.isi_temp_dir = get_dir(self.tmp_dir, 'temp') self.output_dir = get_dir(self.tmp_dir, 'output') return
def __init__(self, *args, **kwargs): self.exec_path, self.version = self._check_reach_env() super(ReachReader, self).__init__(*args, **kwargs) # This is the main application configuration file conf_fmt_fname = path.join(path.dirname(__file__), 'reach_conf_fmt.txt') # This is the KB configuration file which has to be prepended # to make a full working configuration file kb_conf_fname = path.join(path.dirname(__file__), 'kb_conf.txt') # Read in the KB config with open(kb_conf_fname, 'r') as fh: kb_conf = fh.read() self.conf_file_path = path.join(self.tmp_dir, 'indra.conf') with open(conf_fmt_fname, 'r') as fmt_file: fmt = fmt_file.read() log_level = 'INFO' # log_level = 'DEBUG' if logger.level == logging.DEBUG else 'INFO' # Format the main config to our specific setting conf_formatted = fmt.format(tmp_dir=self.tmp_dir, num_cores=self.n_proc, loglevel=log_level) # Now prepend the KB config and write out to a file full_conf = kb_conf + conf_formatted with open(self.conf_file_path, 'w') as f: f.write(full_conf) self.output_dir = get_dir(self.tmp_dir, 'output') self.num_input = 0 return
def __init__(self, *args, **kwargs): self.exec_path, self.version = self._check_reach_env() super(ReachReader, self).__init__(*args, **kwargs) conf_fmt_fname = path.join(path.dirname(__file__), 'reach_conf_fmt.txt') self.conf_file_path = path.join(self.tmp_dir, 'indra.conf') with open(conf_fmt_fname, 'r') as fmt_file: fmt = fmt_file.read() log_level = 'INFO' # log_level = 'DEBUG' if logger.level == logging.DEBUG else 'INFO' with open(self.conf_file_path, 'w') as f: f.write( fmt.format(tmp_dir=self.tmp_dir, num_cores=self.n_proc, loglevel=log_level) ) self.output_dir = get_dir(self.tmp_dir, 'output') self.num_input = 0 return
def main(): # Process the arguments. ================================================= parser = make_parser() args = parser.parse_args() if args.debug and not args.quiet: logger.setLevel(logging.DEBUG) # Get the ids. with open(args.input_file, 'r') as f: input_lines = f.readlines() logger.info("Found %d ids." % len(input_lines)) # Select only a sample of the lines, if sample is chosen. if args.n_samp is not None: input_lines = random.sample(input_lines, args.n_samp) else: random.shuffle(input_lines) # If a range is specified, only use that range. if args.range_str is not None: start_idx, end_idx = [int(n) for n in args.range_str.split(':')] input_lines = input_lines[start_idx:end_idx] # Get the outer batch. B = args.b_out n_max = int(ceil(float(len(input_lines))/B)) # Create a single base directory base_dir = get_dir(args.temp, 'run_%s' % ('_and_'.join(args.readers))) # Get the readers objects. kwargs = {'base_dir': base_dir, 'n_proc': args.n_proc} if args.max_reach_space_ratio is not None: kwargs['input_character_limit'] = args.max_reach_space_ratio if args.max_reach_input_len is not None: kwargs['max_space_ratio'] = args.max_reach_input_len readers = construct_readers(args.readers, **kwargs) # Set the verbosity. The quiet argument overrides the verbose argument. verbose = args.verbose and not args.quiet # Some combinations of options don't make sense: forbidden_combos = [('all', 'unread'), ('none', 'unread'), ('none', 'none')] assert (args.reading_mode, args.rslt_mode) not in forbidden_combos, \ ("The combination of reading mode %s and statement mode %s is not " "allowed." % (args.reading_mode, args.rslt_mode)) for n in range(n_max): logger.info("Beginning outer batch %d/%d. ------------" % (n+1, n_max)) # Get the pickle file names. if args.name is not None: reading_pickle = args.name + '_readings_%d.pkl' % n rslts_pickle = args.name + '_results_%d.pkl' % n else: reading_pickle = None rslts_pickle = None # Get the dict of ids. tcids = [int(tcid_str.strip()) for tcid_str in input_lines[B*n:B*(n+1)]] # Read everything ==================================================== run_reading(readers, tcids, verbose, args.reading_mode, args.rslt_mode, args.b_in, reading_pickle, rslts_pickle, not args.no_reading_upload, not args.no_result_upload)
def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.num_input = 0 self.input_dir = get_dir(self.tmp_dir, 'input') self.output_dir = get_dir(self.tmp_dir, 'output')