Ejemplo n.º 1
0
    def __init__(self, *args, **kwargs):
        super(IsiReader, self).__init__(*args, **kwargs)

        # Define some extra directories
        self.nxml_dir = get_dir(self.tmp_dir, 'nxmls')
        self.isi_temp_dir = get_dir(self.tmp_dir, 'temp')
        self.output_dir = get_dir(self.tmp_dir, 'output')

        return
Ejemplo n.º 2
0
    def __init__(self, *args, **kwargs):
        self.exec_path, self.version = self._check_reach_env()
        super(ReachReader, self).__init__(*args, **kwargs)
        # This is the main application configuration file
        conf_fmt_fname = path.join(path.dirname(__file__), 'reach_conf_fmt.txt')
        # This is the KB configuration file which has to be prepended
        # to make a full working configuration file
        kb_conf_fname = path.join(path.dirname(__file__), 'kb_conf.txt')
        # Read in the KB config
        with open(kb_conf_fname, 'r') as fh:
            kb_conf = fh.read()

        self.conf_file_path = path.join(self.tmp_dir, 'indra.conf')
        with open(conf_fmt_fname, 'r') as fmt_file:
            fmt = fmt_file.read()
            log_level = 'INFO'
            # log_level = 'DEBUG' if logger.level == logging.DEBUG else 'INFO'
            # Format the main config to our specific setting
            conf_formatted = fmt.format(tmp_dir=self.tmp_dir,
                                        num_cores=self.n_proc,
                                        loglevel=log_level)
            # Now prepend the KB config and write out to a file
            full_conf = kb_conf + conf_formatted
            with open(self.conf_file_path, 'w') as f:
                f.write(full_conf)
        self.output_dir = get_dir(self.tmp_dir, 'output')
        self.num_input = 0
        return
Ejemplo n.º 3
0
 def __init__(self, *args, **kwargs):
     self.exec_path, self.version = self._check_reach_env()
     super(ReachReader, self).__init__(*args, **kwargs)
     conf_fmt_fname = path.join(path.dirname(__file__),
                                'reach_conf_fmt.txt')
     self.conf_file_path = path.join(self.tmp_dir, 'indra.conf')
     with open(conf_fmt_fname, 'r') as fmt_file:
         fmt = fmt_file.read()
         log_level = 'INFO'
         # log_level = 'DEBUG' if logger.level == logging.DEBUG else 'INFO'
         with open(self.conf_file_path, 'w') as f:
             f.write(
                 fmt.format(tmp_dir=self.tmp_dir, num_cores=self.n_proc,
                            loglevel=log_level)
             )
     self.output_dir = get_dir(self.tmp_dir, 'output')
     self.num_input = 0
     return
Ejemplo n.º 4
0
def main():
    # Process the arguments. =================================================
    parser = make_parser()
    args = parser.parse_args()
    if args.debug and not args.quiet:
        logger.setLevel(logging.DEBUG)

    # Get the ids.
    with open(args.input_file, 'r') as f:
        input_lines = f.readlines()
    logger.info("Found %d ids." % len(input_lines))

    # Select only a sample of the lines, if sample is chosen.
    if args.n_samp is not None:
        input_lines = random.sample(input_lines, args.n_samp)
    else:
        random.shuffle(input_lines)

    # If a range is specified, only use that range.
    if args.range_str is not None:
        start_idx, end_idx = [int(n) for n in args.range_str.split(':')]
        input_lines = input_lines[start_idx:end_idx]

    # Get the outer batch.
    B = args.b_out
    n_max = int(ceil(float(len(input_lines))/B))

    # Create a single base directory
    base_dir = get_dir(args.temp, 'run_%s' % ('_and_'.join(args.readers)))

    # Get the readers objects.
    kwargs = {'base_dir': base_dir, 'n_proc': args.n_proc}
    if args.max_reach_space_ratio is not None:
        kwargs['input_character_limit'] = args.max_reach_space_ratio
    if args.max_reach_input_len is not None:
        kwargs['max_space_ratio'] = args.max_reach_input_len
    readers = construct_readers(args.readers, **kwargs)

    # Set the verbosity. The quiet argument overrides the verbose argument.
    verbose = args.verbose and not args.quiet

    # Some combinations of options don't make sense:
    forbidden_combos = [('all', 'unread'), ('none', 'unread'),
                        ('none', 'none')]
    assert (args.reading_mode, args.rslt_mode) not in forbidden_combos, \
        ("The combination of reading mode %s and statement mode %s is not "
         "allowed." % (args.reading_mode, args.rslt_mode))

    for n in range(n_max):
        logger.info("Beginning outer batch %d/%d. ------------" % (n+1, n_max))

        # Get the pickle file names.
        if args.name is not None:
            reading_pickle = args.name + '_readings_%d.pkl' % n
            rslts_pickle = args.name + '_results_%d.pkl' % n
        else:
            reading_pickle = None
            rslts_pickle = None

        # Get the dict of ids.
        tcids = [int(tcid_str.strip())
                 for tcid_str in input_lines[B*n:B*(n+1)]]

        # Read everything ====================================================
        run_reading(readers, tcids, verbose, args.reading_mode, args.rslt_mode,
                    args.b_in, reading_pickle, rslts_pickle,
                    not args.no_reading_upload, not args.no_result_upload)
Ejemplo n.º 5
0
 def __init__(self, *args, **kwargs):
     super().__init__(*args, **kwargs)
     self.num_input = 0
     self.input_dir = get_dir(self.tmp_dir, 'input')
     self.output_dir = get_dir(self.tmp_dir, 'output')