Example #1
0
        def process(cls, named_seqs: List[List[str]]):
            if named_seqs is None:
                return 0, None

            seqs: List[List[BaseGenerator]] = [
                list(map(lambda x: cls.generators[x], s)) for s in named_seqs
            ]
            max_seq_trials = cls.args.max_seq_trials
            results: List[Dict] = []

            for idx, seq in enumerate(seqs):
                engine = RandProgEngine(seq, cls.args)
                for trial in range(max_seq_trials):
                    try:
                        spec: ExplorationSpec = engine.generate()
                    except Exception as e:
                        if cls.args.debug:
                            logger.warn("Encountered exception for",
                                        named_seqs[idx])
                            logger.log(e)
                            logging.exception(e)

                        continue

                    if spec is None:
                        continue

                    dpoint = {
                        'inputs': spec.inputs,
                        'output': spec.output,
                        'intermediates': spec.intermediates,
                        'program_str': str(spec.program),
                        'program': spec.program,
                        'function_sequence': named_seqs[idx],
                        'generator_tracking': spec.tracking
                    }

                    # print("-" * 50)
                    # print(dpoint)
                    # print("-" * 50)
                    # print([t.record for t in spec.tracking])
                    # print(spec.program)

                    #  Confirm it's picklable. Sometimes, unpickling throws an error
                    #  when the main process is receiving the msg, and things break down
                    #  in a very, very nasty manner
                    #  TODO : Can we switch to dill while using multiprocessing/pebble?
                    try:
                        a = pickle.dumps(dpoint)
                        pickle.loads(a)
                    except:
                        continue

                    results.append(dpoint)
                    break

            return len(named_seqs), results
Example #2
0
    def generate(self):
        self.init()
        num_generated = 0
        num_processed = 0
        num_raw_points = -1
        if os.path.exists(self.args.raw_data_path + '.index'):
            reader = IndexedFileReader(self.args.raw_data_path)
            num_raw_points = len(reader)
            reader.close()

        start_time = time.time()
        with pebble.ProcessPool(
                max_workers=self.args.processes,
                initializer=FunctionSeqDataGenerator.Worker.init,
                initargs=(self.args, )) as p:

            chunksize = self.args.processes * self.args.chunksize
            for chunk in misc.grouper(chunksize, self.raw_data_iterator()):
                future = p.map(FunctionSeqDataGenerator.Worker.process,
                               chunk,
                               timeout=self.args.task_timeout)
                res_iter = future.result()

                idx = -1
                while True:
                    idx += 1
                    if idx < len(chunk) and chunk[idx] is not None:
                        num_processed += 1

                    try:
                        result = next(res_iter)
                        if chunk[idx] is None:
                            continue

                        if result is not None:
                            self.process_result(result)
                            num_generated += 1

                    except StopIteration:
                        break

                    except TimeoutError as error:
                        pass

                    except Exception as e:
                        try:
                            logger.warn("Failed for", chunk[idx])
                            logging.exception(e)

                        except:
                            pass

                    finally:

                        speed = round(
                            num_processed / (time.time() - start_time), 1)
                        if num_raw_points != -1:
                            time_remaining = round(
                                (num_raw_points - num_processed) / speed, 1)
                        else:
                            time_remaining = '???'

                        logger.log(
                            "Generated/Processed : {}/{} ({}/s, TTC={}s)".
                            format(num_generated, num_processed, speed,
                                   time_remaining),
                            end='\r')

            p.stop()
            try:
                p.join(10)
            except:
                pass

        self.fwriter.close()

        logger.log("\n-------------------------------------------------")
        logger.info("Total Time : {:.2f}s".format(time.time() - start_time))
        logger.info(
            "Generated {} training points from {} raw data points".format(
                num_generated, num_processed))
Example #3
0
    def generate(self):
        self.init()
        num_generated = 0
        num_processed = 0
        num_required = self.args.num_training_points
        self.sequences = self.load_sequences()
        start_time = time.time()
        speed = 0
        time_remaining = 'inf'

        with pebble.ProcessPool(max_workers=self.args.processes,
                                initializer=RawDataGenerator.Worker.init,
                                initargs=(self.args, )) as p:

            #  First do smaller chunksizes to allow the blacklist to take effect
            chunksize = self.args.processes * self.args.chunksize

            if self.args.blacklist_threshold == -1:
                chunksize_blacklist = chunksize
            else:
                chunksize_blacklist = max(
                    (self.args.blacklist_threshold //
                     self.args.max_seq_trials), 1) * len(self.sequences)

            for chunk in misc.grouper([chunksize_blacklist, chunksize],
                                      self.gen_named_seqs()):
                if not p.active:
                    break

                future = p.map(RawDataGenerator.Worker.process,
                               chunk,
                               timeout=self.args.task_timeout)
                res_iter = future.result()

                idx = -1
                while True:
                    idx += 1
                    if num_generated >= num_required:
                        p.stop()
                        try:
                            p.join(10)
                        except:
                            pass
                        break

                    try:
                        returned = next(res_iter)
                        if returned is None:
                            self.report_error_seqs(chunk[idx])
                            continue

                        num_input_seqs, results = returned
                        num_processed += num_input_seqs
                        if results is not None and len(results) > 0:
                            for seq in chunk[idx]:
                                self.whitelist.add(tuple(seq))

                            for result in results:
                                num_generated += 1
                                self.process_dpoint(result)

                            speed = round(
                                num_generated / (time.time() - start_time), 1)
                            time_remaining = round(
                                (num_required - num_generated) / speed, 1)

                        elif num_input_seqs > 0:
                            self.report_error_seqs(chunk[idx])

                        logger.log("Num Generated : {} ({}/s, TTC={}s)".format(
                            num_generated, speed, time_remaining),
                                   end='\r')

                    except StopIteration:
                        break

                    except TimeoutError as error:
                        pass

                    except Exception as e:
                        logger.warn("Failed for", chunk[idx])

            p.stop()
            try:
                p.join(10)
            except:
                pass

        self.fwriter.close()
        logger.log("\n-------------------------------------------------")
        logger.info("Total Time : {:.2f}s".format(time.time() - start_time))
        logger.info("Number of sequences processed :", num_processed)
        logger.info("Number of training points generated :", num_generated)