Beispiel #1
0
def zeroconf_fit_ensemble(y, atsklrn_tempdir):
    lo = utl.get_logger(inspect.stack()[0][3])

    lo.info("Building ensemble")

    seed = 1

    ensemble = AutoSklearnClassifier(
        time_left_for_this_task=300,
        per_run_time_limit=150,
        ml_memory_limit=20240,
        ensemble_size=50,
        ensemble_nbest=200,
        shared_mode=True,
        tmp_folder=atsklrn_tempdir,
        output_folder=atsklrn_tempdir,
        delete_tmp_folder_after_terminate=False,
        delete_output_folder_after_terminate=False,
        initial_configurations_via_metalearning=0,
        seed=seed)

    lo.info("Done AutoSklearnClassifier - seed:" + str(seed))

    try:
        lo.debug("Start ensemble.fit_ensemble - seed:" + str(seed))
        ensemble.fit_ensemble(task=BINARY_CLASSIFICATION,
                              y=y,
                              metric=autosklearn.metrics.f1,
                              precision='32',
                              dataset_name='foobar',
                              ensemble_size=10,
                              ensemble_nbest=15)
    except Exception:
        lo = utl.get_logger(inspect.stack()[0][3])
        lo.exception("Error in ensemble.fit_ensemble - seed:" + str(seed))
        raise

    lo = utl.get_logger(inspect.stack()[0][3])
    lo.debug("Done ensemble.fit_ensemble - seed:" + str(seed))

    sleep(20)
    lo.info("Ensemble built - seed:" + str(seed))

    lo.info("Show models - seed:" + str(seed))
    txtList = str(ensemble.show_models()).split("\n")
    for row in txtList:
        lo.info(row)

    return ensemble
Beispiel #2
0
def spawn_autosklearn_classifier(X_train, y_train, seed, dataset_name,
                                 time_left_for_this_task, per_run_time_limit,
                                 feat_type, memory_limit, atsklrn_tempdir):
    lo = utl.get_logger(inspect.stack()[0][3])

    try:
        lo.info("Start AutoSklearnClassifier seed=" + str(seed))
        clf = AutoSklearnClassifier(
            time_left_for_this_task=time_left_for_this_task,
            per_run_time_limit=per_run_time_limit,
            ml_memory_limit=memory_limit,
            shared_mode=True,
            tmp_folder=atsklrn_tempdir,
            output_folder=atsklrn_tempdir,
            delete_tmp_folder_after_terminate=False,
            delete_output_folder_after_terminate=False,
            initial_configurations_via_metalearning=0,
            ensemble_size=0,
            seed=seed)
    except Exception:
        lo.exception("Exception AutoSklearnClassifier seed=" + str(seed))
        raise

    lo = utl.get_logger(inspect.stack()[0][3])
    lo.info("Done AutoSklearnClassifier seed=" + str(seed))

    sleep(seed)

    try:
        lo.info("Starting seed=" + str(seed))
        try:
            clf.fit(X_train,
                    y_train,
                    metric=autosklearn.metrics.f1,
                    feat_type=feat_type,
                    dataset_name=dataset_name)
        except Exception:
            lo = utl.get_logger(inspect.stack()[0][3])
            lo.exception("Error in clf.fit - seed:" + str(seed))
            raise
    except Exception:
        lo = utl.get_logger(inspect.stack()[0][3])
        lo.exception("Exception in seed=" + str(seed) + ".  ")
        traceback.print_exc()
        raise
    lo = utl.get_logger(inspect.stack()[0][3])
    lo.info("####### Finished seed=" + str(seed))
    return None
Beispiel #3
0
def train_multicore(X,
                    y,
                    feat_type,
                    memory_limit,
                    atsklrn_tempdir,
                    pool_size=1,
                    per_run_time_limit=60):
    lo = utl.get_logger(inspect.stack()[0][3])

    time_left_for_this_task = calculate_time_left_for_this_task(
        pool_size, per_run_time_limit)

    lo.info("Max time allowance for a model " +
            str(math.ceil(per_run_time_limit / 60.0)) + " minute(s)")
    lo.info("Overal run time is about " +
            str(2 * math.ceil(time_left_for_this_task / 60.0)) + " minute(s)")

    processes = []
    for i in range(2,
                   pool_size + 2):  # reserve seed 1 for the ensemble building
        seed = i
        pr = multiprocessing.Process(target=spawn_autosklearn_classifier,
                                     args=(X, y, i, 'foobar',
                                           time_left_for_this_task,
                                           per_run_time_limit, feat_type,
                                           memory_limit, atsklrn_tempdir))
        pr.start()
        lo.info("Multicore process " + str(seed) + " started")
        processes.append(pr)
    for pr in processes:
        pr.join()

    lo.info("Multicore fit completed")
Beispiel #4
0
    def _init_environment(self):
        # Folders for storage/retrival
        self.main_directory = '../'
        self.checkpoint_dir = self.main_directory + 'checkpts/' + self.model_name + '/'
        self.tensorboard_dir = self.main_directory + 'tb_graphs/' + self.model_name + '/'
        self.solutions_dir = self.main_directory + 'solutions/' + self.model_name + '/'
        logging_directory = self.main_directory + 'logs/'
        for dir_name in [
                self.checkpoint_dir, self.tensorboard_dir, self.solutions_dir,
                logging_directory
        ]:
            if not os.path.exists(dir_name): os.makedirs(dir_name)

        self.num_classes = 10 if self.FLAGS.small else 345

        if self.FLAGS.gpu != -1:
            os.environ["CUDA_DEVICE_ORDER"] = 'PCI_BUS_ID'
            os.environ["CUDA_VISIBLE_DEVICES"] = str(
                self.FLAGS.gpu)  # export CUDA_VISIBLE_DEVICES=5

        if in_jupyter():
            get_ipython().system(
                'echo "GPU Device in use: \'$CUDA_VISIBLE_DEVICES\'"')  # pylint: disable=E0602
        else:
            os.system('echo "GPU Device in use: \'$CUDA_VISIBLE_DEVICES\'"')

        log_in_file = True
        if log_in_file:
            logger = get_logger(self.model_name, logging_directory)
            self.logging = logger.info
        else:
            self.logging = print
Beispiel #5
0
def main():
    args = get_args()
    logger = get_logger(args.write_log)
    
    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu
    device_ids_str = args.gpu.split(',')
    device_ids = []
    for i in range(len(device_ids_str)):
        device_ids.append(i)

    multi_gpu = False
    if args.mode != "prep":
        logger.info("Loading network")
        model = AdaMatting(in_channel=4)
        optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=0)
        if args.cuda:
            device = torch.device("cuda:{}".format(device_ids[0]))
            if len(device_ids) > 1 and args.mode=="train":
                logger.info("Loading with multiple GPUs")
                model = torch.nn.DataParallel(model, device_ids=device_ids)
                multi_gpu = True
            model = model.cuda(device=device_ids[0])
        else:
            device = torch.device("cpu")

    if args.mode == "train":
        logger.info("Program runs in train mode")
        train(model=model, optimizer=optimizer, device=device, args=args, logger=logger, multi_gpu=multi_gpu)
    elif args.mode == "test":
        logger.info("Program runs in test mode")
        test()
    elif args.mode == "prep":
        logger.info("Program runs in prep mode")
        # composite_dataset(args.raw_data_path, logger)
        gen_train_valid_names(args.valid_portion, logger)
Beispiel #6
0
 def __init__(self, bot_jid, stream):
     self.bot_jid = bot_jid
     self._stream = stream
     self.cmd_handler = CommandHandler(message_bus = self)
     self.admin_cmd_handler = AdminCMDHandler(message_bus = self)
     self.logger = get_logger()
     self.offline_split_symbol = "$_$_$_$"
     return
Beispiel #7
0
 def __init__(self, bot_jid, stream):
     self.bot_jid = bot_jid
     self._stream = stream
     self.cmd_handler = CommandHandler(message_bus=self)
     self.admin_cmd_handler = AdminCMDHandler(message_bus=self)
     self.logger = get_logger()
     self.offline_split_symbol = "$_$_$_$"
     return
Beispiel #8
0
    def evaluate_runtime(self):
        log_name = datetime.now().strftime('result_runtime.log')
        summary_logger = util.get_logger(opt.result_dir, log_name)
        result = {}
        log = ''
        log += 'OutputNode\t'
        for lr in opt.dash_lr:
            log += '{}p\t'.format(lr)
            result[lr] = {}
        summary_logger.info(log)

        batch_num = self.opt.test_num_batch
        for lr in opt.dash_lr:
            self.dataset.setTargetLR(lr)
            self.model.setTargetScale(self.dataset.getTargetScale())
            for node in self.model.getOutputNodes():
                elapsed_times = []
                t_w = RESOLUTION[lr][0]
                t_h = RESOLUTION[lr][1]
                input = torch.FloatTensor(batch_num, 3, t_w,
                                          t_h).random_(0, 1).to(self.device)

                try:
                    for _ in range(DUMMY_TRIAL):
                        output = self.model(input, node)
                        torch.cuda.synchronize()

                    for _ in range(TEST_TRIAL):
                        start_time = time.perf_counter()
                        output = self.model(input, node)
                        torch.cuda.synchronize()
                        end_time = time.perf_counter()
                        elapsed_time = (end_time - start_time)
                        elapsed_times.append(elapsed_time)

                except Exception as e:
                    print(e)
                    sys.exit()

                average_elapsed_time = np.sum(elapsed_times) / (TEST_TRIAL *
                                                                batch_num)
                result[lr][node] = average_elapsed_time

                print(
                    '[Resolution: Size ({}x{}), OutputNode: {}] / Inference time per frame(sec) {} / Max-Min(sec) {}'
                    .format(
                        t_w, t_h, node, round(average_elapsed_time, 4),
                        round(
                            np.max(elapsed_times) - np.min(elapsed_times), 4)))

        for node in self.output_nodes:
            log = ''
            log += '{}\t'.format(node)
            for lr in self.node2res[node]:
                log += '{}\t'.format(round(result[lr][node], 4))
            summary_logger.info(log)
 def __init__(self, context_name, log_level, log_stream, log_folder,
              database_module, db_configuration, **kwargs):
     self.logger = get_logger(context_name, log_level, log_stream,
                              log_folder)
     print_start(self.logger)
     self.db = database_module(self.logger, **db_configuration)
     self.logger.info("{} setup complete !!".format(context_name))
     if self.db != None:
         self.push_todb = self.db.put
     else:
         raise ValueError("No database parameter given")
 def __init__(self , log_level , log_stream , log_folder="logs"):
     """[Multi Processing class]
         Responsible for running the lambda functions passed in
         inside threads
         Arguments:
             log_level {[string]} -- Levels of log for each process
             log_stream {[string]} -- Stream of log for each process
             
     """
     self.logger = get_logger(__name__, log_level, log_stream , log_folder)
     self.process_list=[]
def define_pool_size(memory_limit):
    # some classifiers can use more than one core - so keep this at half memory and cores
    max_pool_size = int(
        math.ceil(psutil.virtual_memory().total / (memory_limit * 1000000)))
    half_of_cores = int(math.ceil(psutil.cpu_count() / 2.0))

    lo = utl.get_logger(inspect.stack()[0][3])
    lo.info("Virtual Memory Size = " + str(psutil.virtual_memory().total))
    lo.info("CPU Count =" + str(psutil.cpu_count()))
    lo.info("Max CPU Pool Size by Memory = " + str(max_pool_size))

    return half_of_cores if max_pool_size > half_of_cores else max_pool_size
Beispiel #12
0
def x_y_dataframe_split(dataframe, parameter, id=False):
    lo = utl.get_logger(inspect.stack()[0][3])

    lo.info("Dataframe split into X and y")
    X = dataframe.drop([parameter["id_field"], parameter["target_field"]],
                       axis=1)
    y = pd.np.array(dataframe[parameter["target_field"]], dtype='int')
    if id:
        row_id = dataframe[parameter["id_field"]]
        return X, y, row_id
    else:
        return X, y
Beispiel #13
0
def max_estimators_fit_duration(X,
                                y,
                                max_classifier_time_budget,
                                logger,
                                sample_factor=1):
    lo = utl.get_logger(inspect.stack()[0][3])

    lo.info("Constructing preprocessor pipeline and transforming sample data")
    # we don't care about the data here but need to preprocess, otherwise the classifiers crash

    pipeline = SimpleClassificationPipeline(include={
        'imputation': ['most_frequent'],
        'rescaling': ['standardize']
    })
    default_cs = pipeline.get_hyperparameter_search_space(
    ).get_default_configuration()
    pipeline = pipeline.set_hyperparameters(default_cs)

    pipeline.fit(X, y)
    X_tr, dummy = pipeline.fit_transformer(X, y)

    lo.info("Running estimators on the sample")
    # going over all default classifiers used by auto-sklearn
    clfs = autosklearn.pipeline.components.classification._classifiers

    processes = []
    with multiprocessing.Manager() as manager:
        max_clf_time = manager.Value('i', 3)  # default 3 sec
        for clf_name, clf_class in clfs.items():
            pr = multiprocessing.Process(target=time_single_estimator,
                                         name=clf_name,
                                         args=(clf_name, clf_class, X_tr, y,
                                               max_clf_time, logger))
            pr.start()
            processes.append(pr)
        for pr in processes:
            pr.join(max_classifier_time_budget
                    )  # will block for max_classifier_time_budget or
            # until the classifier fit process finishes. After max_classifier_time_budget
            # we will terminate all still running processes here.
            if pr.is_alive():
                logger.info("Terminating " + pr.name +
                            " process due to timeout")
                pr.terminate()
        result_max_clf_time = max_clf_time.value

    lo.info("Test classifier fit completed")

    per_run_time_limit = int(sample_factor * result_max_clf_time)
    return max_classifier_time_budget if per_run_time_limit > max_classifier_time_budget else per_run_time_limit
Beispiel #14
0
def log_from_general() -> None:
    """Showcase a simple log with different levels."""
    # make sure the name matches the customization
    logger = utility.get_logger("LogDemo")

    message = "from LogDemo"
    logger.debug(f"DEBUG - {message}")
    logger.info(f"INFO - {message}")
    logger.warning(f"WARNING - {message}", exc_info=True)
    logger.exception(f"ERROR - {message}")
    logger.error(f"ERROR - {message}", exc_info=True)  # same as `exception()`
    logger.critical(f"CRITICAL - {message}", exc_info=True)

    return
Beispiel #15
0
    def __init__(self):
        my_jid = JID(USER+'/Bot')
        self.my_jid = my_jid
        settings = XMPPSettings({
                            "software_name": "Clubot",
                            "software_version": __version__,
                            "software_os": "Linux",
                            "tls_verify_peer": False,
                            "starttls": True,
                            "ipv6":False,
                            "poll_interval": 10,
                            })

        settings["password"] = PASSWORD
        version_provider = VersionProvider(settings)
        self.connected = False
        mainloop = TornadoMainLoop(settings)
        self.client = Client(my_jid, [self, version_provider], settings, mainloop)
        #self.client = Client(my_jid, [self, version_provider], settings)
        self.logger = get_logger()
        self.trytimes = 0
        self.sended = []
        Logics.empty_status()
Beispiel #16
0
    def __init__(self):
        my_jid = JID(USER + '/Bot')
        self.my_jid = my_jid
        settings = XMPPSettings({
            "software_name": "Clubot",
            "software_version": __version__,
            "software_os": "Linux",
            "tls_verify_peer": False,
            "starttls": True,
            "ipv6": False,
            "poll_interval": 10,
        })

        settings["password"] = PASSWORD
        version_provider = VersionProvider(settings)
        self.connected = False
        mainloop = TornadoMainLoop(settings)
        self.client = Client(my_jid, [self, version_provider], settings,
                             mainloop)
        #self.client = Client(my_jid, [self, version_provider], settings)
        self.logger = get_logger()
        self.trytimes = 0
        self.sended = []
        Logics.empty_status()
# Training #
############

if save_checkpts or restore: saver = tf.train.Saver(max_to_keep=10)
    
checkpoint_dir = checkpoints_directory + model_name + '/'
tensorboard_dir = tensorboard_directory + model_name + '/'

if save_checkpts and not os.path.exists(checkpoint_dir):
    os.makedirs(checkpoint_dir)
if log_in_tb and not os.path.exists(tensorboard_dir):
    os.makedirs(tensorboard_dir)
if log_in_file:
    if not os.path.exists(logging_directory):
        os.makedirs(logging_directory)
    logger = get_logger(model_name, logging_directory)
    logging = logger.info
else:
    logging = print
    
logging("Current model: \n\t{}".format(model_name))

config = tf.ConfigProto()
config.gpu_options.allow_growth=True
with tf.Session(config=config) as sess: 
    sess.run(tf.global_variables_initializer())
    sess.run(tf.tables_initializer())
    
    # Recover previous work
    ckpt = tf.train.get_checkpoint_state(os.path.dirname(checkpoint_dir + 'checkpoint'))
    if restore and ckpt and ckpt.model_checkpoint_path:
Beispiel #18
0
This module defines the Article abstract base class alongside with
many specific classes used to store articles scraped from various
newspapers of interest.

Supported newspaper in alphabetical order:

La Repubblica (LaRepubblicaArticle)

"""

import abc
from parser import get_page_html
from bs4 import BeautifulSoup
from utility import get_logger

LOGGER = get_logger('article')


class ArticleException(Exception):
    """Custom exception for the Article class.

    Attributes
    ----------
    msg: str
        Human readable string describing the exception.

    """
    def __init__(self, msg):
        """Initializer for the ExtractorException class.

        Parameters
                              output_input, video_info))
            while (1):
                input = output_output.recv()
                if input[0] == 'output':
                    end_time = time.time()
                    elapsed_time = end_time - start_time
                    fps = segment_fps * segment_size / (end_time - start_time)
                    print(
                        'overall [elapsed], resolution [{}p] : {} second, {} fps'
                        .format(resolution, elapsed_time, fps))
                    elapsed_time_list[resolution].append(elapsed_time)
                    fps_list[resolution].append(fps)
                    break
                else:
                    print('request: Invalid input')
                    break

    #print statistics
    runtimeLogger = util.get_logger(opt.result_dir, 'result_video_runtime.log')
    for resolution in resolution_list:
        print('[{}p]: minmum {} fps, average {} fps, maximum {} fps'.format(
            resolution, np.min(fps_list[resolution]),
            np.average(fps_list[resolution]), np.max(fps_list[resolution])))
        log_str = "\t".join(map(str, fps_list[resolution]))
        runtimeLogger.info(log_str)

    #terminate processes
    sr_process.terminate()
    decode_process.terminate()
    encode_process.terminate()
def sync_search(device, dir='experiment'):
    dir = os.path.join(
        dir,
        utility.cleanText(f"rLut-{args.rLUT}_rThroughput-{args.rThroughput}"))
    if os.path.exists(dir) is False:
        os.makedirs(dir)
    filepath = os.path.join(
        dir, utility.cleanText(f"joint_{args.episodes}-episodes"))
    logger = utility.get_logger(filepath)
    csvfile = open(filepath + '.csv', mode='w+', newline='')
    writer = csv.writer(csvfile)
    tb_writer = SummaryWriter(filepath)

    logger.info(f"INFORMATION")
    logger.info(f"mode: \t\t\t\t\t {'joint'}")
    logger.info(f"dataset: \t\t\t\t {args.dataset}")
    logger.info(f"number of child network layers: \t {args.layers}")
    logger.info(f"seed: \t\t\t\t {args.seed}")
    logger.info(f"gpu: \t\t\t\t {args.gpu}")
    logger.info(f"include batchnorm: \t\t\t {args.batchnorm}")
    logger.info(f"include stride: \t\t\t {not args.no_stride}")
    logger.info(f"include pooling: \t\t\t {not args.no_pooling}")
    logger.info(f"skip connection: \t\t\t {args.skip}")
    logger.info(f"required # LUTs: \t\t\t {args.rLUT}")
    logger.info(f"required throughput: \t\t\t {args.rThroughput}")
    logger.info(f"Assumed frequency: \t\t\t {CLOCK_FREQUENCY}")
    logger.info(f"training epochs: \t\t\t {args.epochs}")
    logger.info(f"data augmentation: \t\t\t {args.augment}")
    logger.info(f"batch size: \t\t\t\t {args.batch_size}")
    logger.info(f"controller learning rate: \t\t {args.learning_rate}")
    logger.info(f"controller learning rate: \t\t {args.learning_rate}")
    logger.info(f"architecture episodes: \t\t\t {args.episodes}")
    logger.info(f"using multi gpus: \t\t\t {args.multi_gpu}")
    logger.info(f"architecture space: ")
    for name, value in ARCH_SPACE.items():
        logger.info(name + f": \t\t\t\t {value}")
    logger.info(f"quantization space: ")
    for name, value in QUAN_SPACE.items():
        logger.info(name + f": \t\t\t {value}")

    agent = Agent({
        **ARCH_SPACE,
        **QUAN_SPACE
    },
                  args.layers,
                  lr=args.learning_rate,
                  device=torch.device('cpu'),
                  skip=args.skip)

    train_data, val_data = data.get_data(args.dataset,
                                         device,
                                         shuffle=True,
                                         batch_size=args.batch_size,
                                         augment=args.augment)

    input_shape, num_classes = data.get_info(args.dataset)
    ## (3,32,32) -> (1,3,32,32) add batch dimension
    sample_input = utility.get_sample_input(device, input_shape)

    writer.writerow(["ID"] +
                    ["Layer {}".format(i)
                     for i in range(args.layers)] + ["Accuracy"] + [
                         "Partition (Tn, Tm)", "Partition (#LUTs)",
                         "Partition (#cycles)", "Total LUT", "Total Throughput"
                     ] + ["Time"])

    arch_id, total_time = 0, 0
    best_reward = float('-inf')

    logger.info('=' * 50 +
                "Start exploring architecture & quantization space" + '=' * 50)
    best_samples = BestSamples(5)

    for e in range(args.episodes):
        logger.info('-' * 130)
        arch_id += 1
        start = time.time()
        rollout, paras = agent.rollout()
        logger.info("Sample Architecture ID: {}, Sampled actions: {}".format(
            arch_id, rollout))
        arch_paras, quan_paras = utility.split_paras(paras)

        fpga_model = FPGAModel(rLUT=args.rLUT,
                               rThroughput=args.rThroughput,
                               arch_paras=arch_paras,
                               quan_paras=quan_paras)

        if fpga_model.validate():

            model, optimizer = child.get_model(input_shape,
                                               arch_paras,
                                               num_classes,
                                               device,
                                               multi_gpu=args.multi_gpu,
                                               do_bn=args.batchnorm)

            if args.verbosity > 1:
                print(model)
                torchsummary.summary(model, input_shape)

            if args.adapt:
                num_w = utility.get_net_param(model)
                macs = utility.get_net_macs(model, sample_input)
                tb_writer.add_scalar('num_param', num_w, arch_id)
                tb_writer.add_scalar('macs', macs, arch_id)
                if args.verbosity > 1:
                    print(f"# of param: {num_w}, macs: {macs}")

            _, val_acc = backend.fit(model,
                                     optimizer,
                                     train_data,
                                     val_data,
                                     quan_paras=quan_paras,
                                     epochs=args.epochs,
                                     verbosity=args.verbosity)
        else:
            val_acc = 0

        if args.adapt:
            ## TODO: how to make arch_reward function with macs and latency?
            arch_reward = val_acc
        else:
            arch_reward = val_acc

        agent.store_rollout(rollout, arch_reward)
        end = time.time()
        ep_time = end - start
        total_time += ep_time
        best_samples.register(arch_id, rollout, arch_reward)

        tb_writer.add_scalar('val_acc', val_acc, arch_id)
        tb_writer.add_scalar('arch_reward', arch_reward, arch_id)

        if arch_reward > best_reward:
            best_reward = arch_reward
            tb_writer.add_scalar('best_reward', best_reward, arch_id)
            tb_writer.add_graph(model.eval(), (sample_input, ))

        writer.writerow([arch_id] +
                        [str(paras[i])
                         for i in range(args.layers)] + [arch_reward] +
                        list(fpga_model.get_info()) + [ep_time])
        logger.info(f"Reward: {arch_reward}, " + f"Elasped time: {ep_time}, " +
                    f"Average time: {total_time/(e+1)}")
        logger.info(f"Best Reward: {best_samples.reward_list[0]}, " +
                    f"ID: {best_samples.id_list[0]}, " +
                    f"Rollout: {best_samples.rollout_list[0]}")
    logger.info('=' * 50 +
                "Architecture & quantization sapce exploration finished" +
                '=' * 50)
    logger.info(f"Total elasped time: {total_time}")
    logger.info(f"Best samples: {best_samples}")
    tb_writer.close()
    csvfile.close()
Beispiel #21
0
    def evaluate_quality(self):
        #summary log
        log_name = datetime.now().strftime(
            'result_quality_summary_{}.log'.format(opt.test_num_epoch))
        summary_logger = util.get_logger(opt.result_dir, log_name)
        log = ''
        log += 'outputIdx\t'
        for lr in opt.dash_lr:
            log += 'PSNR(SR, {}p)\t'.format(lr)
            log += 'PSNR(bicubic, {}p)\t'.format(lr)
            log += '\t'
            log += 'SSIM(SR, {}p)\t'.format(lr)
            log += 'SSIM(bicubic, {}p)\t'.format(lr)
            log += '\t'
        summary_logger.info(log)

        #detail log (per frame)
        detail_logger = {}
        for output_node in self.output_nodes:
            detaill_logname = datetime.now().strftime(
                'result_quality_detail_{}_{}.log'.format(
                    output_node, opt.test_num_epoch))
            detail_logger[output_node] = util.get_logger(
                opt.result_dir, detaill_logname)

        for output_node in self.output_nodes:
            log = ''
            log += 'FrameIdx\t'
            for lr in self.node2res[output_node]:
                log += 'PSNR(SR, {}p)\t'.format(lr)
                log += 'PSNR(bicubic, {}p)\t'.format(lr)
                log += '\t'
                log += 'SSIM(SR, {}p)\t'.format(lr)
                log += 'SSIM(bicubic, {}p)\t'.format(lr)
                log += '\t'
            detail_logger[output_node].info(log)

        #analyze
        baseline_result = self._analyze_baseline()
        sr_result = {}
        for output_node in self.output_nodes:
            sr_result[output_node] = self._analyze_sr(output_node)

        #logging
        for output_node in self.output_nodes:
            #analyze
            log = ''
            log += '{}\t'.format(output_node)
            for lr in opt.dash_lr:
                if lr in self.node2res[output_node]:
                    log += '{}\t'.format(
                        np.mean(sr_result[output_node][lr].psnr))
                    log += '{}\t'.format(np.mean(baseline_result[lr].psnr))
                    log += '\t'
                    log += '{}\t'.format(
                        np.mean(sr_result[output_node][lr].ssim))
                    log += '{}\t'.format(np.mean(baseline_result[lr].ssim))
                    log += '\t'
                else:
                    log += '\t'
                    log += '\t'
                    log += '\t'
                    log += '\t'
                    log += '\t'
                    log += '\t'
            summary_logger.info(log)

            for idx in range(len(self.dataset)):
                log = ''
                log += '{}\t'.format(idx)
                for lr in opt.dash_lr:
                    if lr in self.node2res[output_node]:
                        log += '{}\t'.format(
                            sr_result[output_node][lr].psnr[idx])
                        log += '{}\t'.format(baseline_result[lr].psnr[idx])
                        log += '\t'
                        log += '{}\t'.format(
                            sr_result[output_node][lr].ssim[idx])
                        log += '{}\t'.format(baseline_result[lr].ssim[idx])
                        log += '\t'
                    else:
                        log += '\t'
                        log += '\t'
                        log += '\t'
                        log += '\t'
                        log += '\t'
                        log += '\t'
                detail_logger[output_node].info(log)
Beispiel #22
0
 def __init__(self, message_bus):
     self._message_bus = message_bus   # 消息总线
     self._logger = get_logger()       # 日志
     self._http_stream = TornadoHTTPClient()
     self._honor = Honor()
Beispiel #23
0
 def __init__(self, message_bus):
     self._message_bus = message_bus  # 消息总线
     self._logger = get_logger()  # 日志
     self._http_stream = HTTPStream.instance()
Beispiel #24
0
if __name__ == "__main__":

    with open(CONFIG) as file:
        configuration = json.load(file)

    Database_module, db_configuration = parse_dbconfig(configuration)

    ## reading logging configuration
    logging_configuration = configuration["logging"]
    log_folder = logging_configuration["output"]

    if not log_folder in os.listdir('.'):
        os.mkdir(log_folder)

    logger = get_logger(__name__, log_level, log_stream, log_folder)
    ## logger for main thread

    ## logger test in main thread
    print_start(logger)
    logger.info("Application started , Extracting all the plugins")

    ## handles creating mutiple process
    ## from single process using MultiProcessing

    import_list = configuration["plugins"]

    with MultiProcessingContext(log_level, log_stream, log_folder) as execute:

        for attr in import_list:
            path = attr["filename"]
Beispiel #25
0
 def __init__(self, message_bus):
     self._message_bus = message_bus  # 消息总线
     self._logger = get_logger()  # 日志
     self._http_stream = TornadoHTTPClient()
     self._honor = Honor()
Beispiel #26
0
La Repubblica (LaRepubblicaExtractor)

"""

import re
import abc
import csv
from parser import get_page_html
from datetime import date, timedelta
from tqdm import tqdm
from bs4 import BeautifulSoup
from article import ArticleException, LaRepubblicaArticle
from utility import datespan, get_logger

LOGGER = get_logger('extractor')


class ExtractorException(Exception):
    """Custom exception for the Extractor class.

    Attributes
    ----------
    msg: str
        Human readable string describing the exception.

    """
    def __init__(self, msg):
        """Initializer for the ExtractorException class.

        Parameters
def quantization_search(device, dir='experiment'):
    dir = os.path.join(
        dir,
        utility.cleanText(f"rLut-{args.rLUT}_rThroughput-{args.rThroughput}"))
    if os.path.exists(dir) is False:
        os.makedirs(dir)
    filepath = os.path.join(
        dir, utility.cleanText(f"quantization_{args.episodes}-episodes"))
    logger = utility.get_logger(filepath)
    csvfile = open(filepath + '.csv', mode='w+', newline='')
    writer = csv.writer(csvfile)
    logger.info(f"INFORMATION")
    logger.info(f"mode: \t\t\t\t\t {'quantization'}")
    logger.info(f"dataset: \t\t\t\t {args.dataset}")
    logger.info(f"seed: \t\t\t\t {args.seed}")
    logger.info(f"gpu: \t\t\t\t {args.gpu}")
    logger.info(f"number of child network layers: \t {args.layers}")
    logger.info(f"include batchnorm: \t\t\t {args.batchnorm}")
    logger.info(f"include stride: \t\t\t {not args.no_stride}")
    logger.info(f"include pooling: \t\t\t {not args.no_pooling}")
    logger.info(f"skip connection: \t\t\t {args.skip}")
    logger.info(f"required # LUTs: \t\t\t {args.rLUT}")
    logger.info(f"required throughput: \t\t\t {args.rThroughput}")
    logger.info(f"Assumed frequency: \t\t\t {CLOCK_FREQUENCY}")
    logger.info(f"training epochs: \t\t\t {args.epochs}")
    logger.info(f"data augmentation: \t\t\t {args.augment}")
    logger.info(f"batch size: \t\t\t\t {args.batch_size}")
    logger.info(f"controller learning rate: \t\t {args.learning_rate}")
    logger.info(f"architecture episodes: \t\t\t {args.episodes}")
    logger.info(f"using multi gpus: \t\t\t {args.multi_gpu}")
    logger.info(f"architecture space: ")
    # for name, value in ARCH_SPACE.items():
    #     logger.info(name + f": \t\t\t\t {value}")
    logger.info(f"quantization space: ")
    for name, value in QUAN_SPACE.items():
        logger.info(name + f": \t\t\t {value}")
    agent = Agent(QUAN_SPACE,
                  args.layers,
                  lr=args.learning_rate,
                  device=torch.device('cpu'),
                  skip=False)
    train_data, val_data = data.get_data(args.dataset,
                                         device,
                                         shuffle=True,
                                         batch_size=args.batch_size,
                                         augment=args.augment)
    input_shape, num_classes = data.get_info(args.dataset)
    writer.writerow(["ID"] +
                    ["Layer {}".format(i)
                     for i in range(args.layers)] + ["Accuracy"] + [
                         "Partition (Tn, Tm)", "Partition (#LUTs)",
                         "Partition (#cycles)", "Total LUT", "Total Throughput"
                     ] + ["Time"])
    child_id, total_time = 0, 0
    logger.info('=' * 50 + "Start exploring quantization space" + '=' * 50)
    best_samples = BestSamples(5)
    A1 = [{
        'filter_height': 3,
        'filter_width': 3,
        'stride_height': 1,
        'stride_width': 1,
        'num_filters': 64,
        'pool_size': 1
    }, {
        'filter_height': 7,
        'filter_width': 5,
        'stride_height': 1,
        'stride_width': 1,
        'num_filters': 48,
        'pool_size': 1
    }, {
        'filter_height': 5,
        'filter_width': 5,
        'stride_height': 2,
        'stride_width': 1,
        'num_filters': 48,
        'pool_size': 1
    }, {
        'filter_height': 3,
        'filter_width': 5,
        'stride_height': 1,
        'stride_width': 1,
        'num_filters': 64,
        'pool_size': 1
    }, {
        'filter_height': 5,
        'filter_width': 7,
        'stride_height': 1,
        'stride_width': 1,
        'num_filters': 36,
        'pool_size': 1
    }, {
        'filter_height': 3,
        'filter_width': 1,
        'stride_height': 1,
        'stride_width': 2,
        'num_filters': 64,
        'pool_size': 2
    }]
    A2 = [{
        'filter_height': 3,
        'filter_width': 3,
        'stride_height': 1,
        'stride_width': 1,
        'num_filters': 24,
        'pool_size': 1
    }, {
        'filter_height': 5,
        'filter_width': 5,
        'stride_height': 1,
        'stride_width': 1,
        'num_filters': 36,
        'pool_size': 1
    }, {
        'filter_height': 5,
        'filter_width': 5,
        'stride_height': 2,
        'stride_width': 1,
        'num_filters': 64,
        'pool_size': 1
    }, {
        'filter_height': 5,
        'filter_width': 5,
        'stride_height': 1,
        'stride_width': 1,
        'num_filters': 64,
        'pool_size': 1
    }, {
        'filter_height': 5,
        'filter_width': 5,
        'stride_height': 1,
        'stride_width': 2,
        'num_filters': 24,
        'pool_size': 1
    }, {
        'filter_height': 3,
        'filter_width': 3,
        'stride_height': 1,
        'stride_width': 2,
        'num_filters': 64,
        'pool_size': 1
    }]

    B1 = [{
        'filter_height': 3,
        'filter_width': 3,
        'num_filters': 64,
        'pool_size': 1
    }, {
        'filter_height': 3,
        'filter_width': 5,
        'num_filters': 64,
        'pool_size': 1
    }, {
        'filter_height': 3,
        'filter_width': 3,
        'num_filters': 64,
        'pool_size': 2
    }, {
        'filter_height': 5,
        'filter_width': 5,
        'num_filters': 64,
        'pool_size': 2
    }, {
        'filter_height': 5,
        'filter_width': 3,
        'num_filters': 64,
        'pool_size': 1
    }, {
        'filter_height': 7,
        'filter_width': 7,
        'num_filters': 64,
        'pool_size': 1
    }]

    B2 = [{
        'filter_height': 5,
        'filter_width': 3,
        'num_filters': 64,
        'pool_size': 1
    }, {
        'filter_height': 3,
        'filter_width': 5,
        'num_filters': 64,
        'pool_size': 1
    }, {
        'filter_height': 3,
        'filter_width': 5,
        'num_filters': 64,
        'pool_size': 2
    }, {
        'filter_height': 5,
        'filter_width': 5,
        'num_filters': 64,
        'pool_size': 2
    }, {
        'filter_height': 5,
        'filter_width': 3,
        'num_filters': 64,
        'pool_size': 1
    }, {
        'filter_height': 7,
        'filter_width': 7,
        'num_filters': 64,
        'pool_size': 1
    }]

    arch_paras = B2
    model, optimizer = child.get_model(input_shape,
                                       arch_paras,
                                       num_classes,
                                       device,
                                       multi_gpu=args.multi_gpu,
                                       do_bn=False)
    _, val_acc = backend.fit(model,
                             optimizer,
                             train_data=train_data,
                             val_data=val_data,
                             epochs=args.epochs,
                             verbosity=args.verbosity)
    print(val_acc)
    for e in range(args.episodes):
        logger.info('-' * 130)
        child_id += 1
        start = time.time()
        quan_rollout, quan_paras = agent.rollout()
        logger.info("Sample Quantization ID: {}, Sampled actions: {}".format(
            child_id, quan_rollout))
        fpga_model = FPGAModel(rLUT=args.rLUT,
                               rThroughput=args.rThroughput,
                               arch_paras=arch_paras,
                               quan_paras=quan_paras)
        if fpga_model.validate():
            _, reward = backend.fit(model,
                                    optimizer,
                                    val_data=val_data,
                                    quan_paras=quan_paras,
                                    epochs=1,
                                    verbosity=args.verbosity)
        else:
            reward = 0
        agent.store_rollout(quan_rollout, reward)
        end = time.time()
        ep_time = end - start
        total_time += ep_time
        best_samples.register(child_id, quan_rollout, reward)
        writer.writerow([child_id] +
                        [str(quan_paras[i]) for i in range(args.layers)] +
                        [reward] + list(fpga_model.get_info()) + [ep_time])
        logger.info(f"Reward: {reward}, " + f"Elasped time: {ep_time}, " +
                    f"Average time: {total_time/(e+1)}")
        logger.info(f"Best Reward: {best_samples.reward_list[0]}, " +
                    f"ID: {best_samples.id_list[0]}, " +
                    f"Rollout: {best_samples.rollout_list[0]}")
    logger.info('=' * 50 + "Quantization sapce exploration finished" +
                '=' * 50)
    logger.info(f"Total elasped time: {total_time}")
    logger.info(f"Best samples: {best_samples}")
    csvfile.close()
Beispiel #28
0
def run():
    args = parse_args()
    # 初始化随机数种子,以便于复现实验结果
    start_epoch = 1
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    if args.device != -1:
        torch.cuda.manual_seed(args.seed)
    device = torch.device(f'cuda:{args.device}' if torch.cuda.is_available()
                          and args.device >= 0 else 'cpu')
    if torch.cuda.is_available() and args.device >= 0:
        # 开启这个flag需要保证输入数据的维度不变,不然每次cudnn都要重新优化,反而更加耗时
        # 现在RNN部分输入会进行fit length,CNN那里可以启用这个参数
        if args.arch in ['stack', 'multi', 'stack_multi']:
            torch.backends.cudnn.benchmark = True
    # 输出目录
    if args.resume_snapshot:
        # 判断文件是否存在
        assert os.path.exists(
            args.resume_snapshot), f'{args.resume_snapshot} don"t exist!'
        model_dir, model_file = os.path.split(args.resume_snapshot)
        output_dir, _ = os.path.split(model_dir)
    else:
        base_dir = time.strftime("%Y-%m-%d_%H:%M:%S", time.localtime())
        output_dir = os.path.join(args.out_dir, base_dir)
        model_dir = os.path.join(output_dir, 'save_model')
        os.makedirs(output_dir)  # 创建输出根目录
        os.makedirs(model_dir)
    # 输出参数
    logger = get_logger(output_dir)
    logger.info(pprint.pformat(vars(args)))
    logger.info(f'output dir is {output_dir}')
    # 获取数据集
    train_dataset, dev_dataset, test_dataset, vocab, vectors = get_dataset(
        args, logger)
    vectors_dim = 300 if vectors is None else vectors.size(1)
    # 创建迭代器
    train_loader = torchtext.data.BucketIterator(train_dataset,
                                                 args.batch_size,
                                                 device=device,
                                                 train=True,
                                                 shuffle=True,
                                                 sort=False,
                                                 repeat=False)
    dev_loader = torchtext.data.BucketIterator(dev_dataset,
                                               args.batch_size,
                                               device=device,
                                               train=False,
                                               shuffle=False,
                                               sort=False,
                                               repeat=False)
    test_loader = torchtext.data.BucketIterator(test_dataset,
                                                args.batch_size,
                                                device=device,
                                                train=False,
                                                shuffle=False,
                                                sort=False,
                                                repeat=False)
    # 创建模型,优化器,损失函数
    if args.arch == 'stack':
        model = StackCNN(vocab_size=len(vocab),
                         embed_dim=vectors_dim,
                         embed_weight=vectors,
                         kernel_sizes=args.stack_kernel_sizes,
                         out_channels=args.stack_out_channels).to(device)
    elif args.arch == 'multi':
        model = MultiCNN(vocab_size=len(vocab),
                         embed_dim=vectors_dim,
                         embed_weight=vectors,
                         kernel_sizes=args.multi_kernel_sizes,
                         out_channels=args.multi_out_channels).to(device)
    elif args.arch == 'stack_multi':
        model = StackMultiCNN(
            vocab_size=len(vocab),
            embed_dim=vectors_dim,
            embed_weight=vectors,
            stack_kernel_sizes=args.stack_kernel_sizes,
            stack_out_channels=args.stack_out_channels,
            multi_kernel_sizes=args.multi_kernel_sizes,
            multi_out_channels=args.multi_out_channels).to(device)
    elif args.arch == 'bigru':
        assert args.hidden_size.find(
            ',') == -1, '--hidden-size must be a int for BiLSTM/BiGRU model'
        hidden_size = int(args.hidden_size)
        model = BiGRU(vocab_size=len(vocab),
                      embedding_dim=vectors_dim,
                      hidden_size=hidden_size,
                      dropout_r=args.dropout,
                      embed_weight=vectors).to(device)
    elif args.arch == 'bigru_cnn':
        assert args.hidden_size.find(
            ',') == -1, '--hidden-size must be a int for BiLSTM/BiGRU model'
        hidden_size = int(args.hidden_size)
        model = BiGRUCNN(vocab_size=len(vocab),
                         embedding_dim=vectors_dim,
                         hidden_size=hidden_size,
                         cnn_channel=args.cnn_channel,
                         dropout_r=args.dropout,
                         embed_weight=vectors).to(device)
    # elif args.arch == 'norm_stack_multi':
    #     model = NormStackMultiCNN(vocab_size=len(vocab), embed_dim=vectors_dim, sent_length=args.fix_length,
    #                               embed_weight=vectors).to(device)
    # elif args.arch == 'stack_multi_atten':
    #     model = QA_StackMultiAttentionCNN(vocab_size=len(vocab), embed_dim=vectors_dim, embed_weight=vectors).to(
    #         device)
    # elif args.arch == 'ap_stack_multi':
    #     model = QA_AP_StackMultiCNN(vocab_size=len(vocab), embed_dim=vectors_dim, embed_weight=vectors).to(
    #         device)
    # elif args.arch == 'bilstm':
    #     assert args.hidden_size.find(',') == -1, '--hidden-size must be a int for LSTM model'
    #     hidden_size = int(args.hidden_size)
    #     model = BiLSTM(vocab_size=len(vocab), embedding_dim=vectors_dim, hidden_size=hidden_size,
    #                    dropout_r=args.dropout, embed_weight=vectors).to(device)
    # elif args.arch == 'stack_bilstm':
    #     hidden_size = [int(i) for i in args.hidden_size.split(',')]
    #     model = StackBiLSTM(vocab_size=len(vocab), embedding_dim=vectors_dim, hidden_size=hidden_size,
    #                         mlp_d=args.mlp_d, dropout_r=args.dropout, embed_weight=vectors).to(device)
    # elif args.arch == 'bigru':
    #     assert args.hidden_size.find(',') == -1, '--hidden-size must be a int for BiLSTM/BiGRU model'
    #     hidden_size = int(args.hidden_size)
    #     model = BiGRU(vocab_size=len(vocab), embedding_dim=vectors_dim, hidden_size=hidden_size,
    #                   dropout_r=args.dropout, embed_weight=vectors).to(device)
    # elif args.arch == 'stack_bigru':
    #     hidden_size = [int(i) for i in args.hidden_size.split(',')]
    #     model = StackBiGRU(vocab_size=len(vocab), embedding_dim=vectors_dim, hidden_size=hidden_size,
    #                        mlp_d=args.mlp_d,
    #                        sent_max_length=args.fix_length, dropout_r=args.dropout, embed_weight=vectors).to(device)
    else:
        raise ValueError("--arch is unknown")
    # 为特定模型指定特殊的优化函数
    if args.optimizer == 'adam':
        optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
    elif args.optimizer == 'rmsprop':
        optimizer = torch.optim.RMSprop(model.parameters(), lr=args.lr)
    elif args.optimizer == 'adagrad':
        optimizer = torch.optim.Adagrad(model.parameters(), lr=args.lr)
    elif args.optimizer == 'sgd':
        optimizer = torch.optim.SGD(model.parameters(), lr=args.lr)
    else:
        raise ValueError("--optimizer is unknown")
    loss_fn = torch.nn.MarginRankingLoss(margin=args.margin)
    architecture = model.__class__.__name__
    # 载入以训练的数据
    if args.resume_snapshot:
        state = torch.load(args.resume_snapshot)
        model.load_state_dict(state['model'])
        optimizer.load_state_dict(state['optimizer'])
        epoch = state['epoch']
        start_epoch = state['epoch'] + 1
        if 'best_dev_score' in state:
            # 适配旧版本保存的模型参数
            dev_acc = state['best_dev_score']
            test_acc = 0
        else:
            dev_acc = state['dev_accuracy']
            test_acc = state['test_accuracy']
        logger.info(
            f"load state {args.resume_snapshot}, dev accuracy {dev_acc}, test accuracy {test_acc}"
        )
    # 记录参数
    with open(f'{output_dir}/arguments.csv', 'a') as f:
        for k, v in vars(args).items():
            f.write(f'{k},{v}\n')
    # 将日志写入到TensorBoard中
    writer = SummaryWriter(output_dir)
    # 记录模型的计算图
    try:
        q = torch.randint_like(torch.Tensor(1, args.fix_length),
                               2,
                               100,
                               dtype=torch.long)
        ql = torch.Tensor([args.fix_length]).type(torch.int)
        writer.add_graph(model, ((q, ql), (q, ql)))
    except Exception as e:
        logger.error("Failed to save model graph: {}".format(e))
        # exit()
    # 开始训练
    best_dev_score = -1  # 记录最优的结果
    best_test_score = -1  # 记录最优的结果
    prev_loss = 0
    # 自动调整学习率
    # TODO:暂不启用,Adam已经能够自动调整学习率了
    # scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=args.lr_reduce_factor,
    #                                                        patience=args.patience, verbose=True)
    if not args.skip_training:
        for epoch in range(start_epoch, start_epoch + args.epochs):
            start_time = time.time()
            # train epoch
            loss = train_epoch(epoch, train_loader, model, optimizer, loss_fn,
                               device)
            writer.add_scalar('train/loss', loss, epoch)
            logger.info(f'Train Epoch {epoch}: loss={loss}')
            # evaluate
            dev_accuracy = evaluate(dev_loader, model, 1)
            logger.info(
                f'Evaluation metrices: dev accuracy = {100. * dev_accuracy}%')
            writer.add_scalar('dev/lr', optimizer.param_groups[0]['lr'], epoch)
            writer.add_scalar('dev/acc', dev_accuracy, epoch)
            # 进行测试
            test_accuracy = evaluate(test_loader, model, 1)
            logger.info(
                f'Evaluation metrices: test accuracy = {100. * test_accuracy}%'
            )
            writer.add_scalar('test/acc', test_accuracy, epoch)

            # 保存模型
            save_state = {
                'epoch': epoch,
                'dev_accuracy': dev_accuracy,
                'test_accuracy': test_accuracy,
                'architecture': architecture,
                'model': model.state_dict(),
                'optimizer': optimizer.state_dict()
            }
            torch.save(save_state,
                       f'{model_dir}/{architecture}_epoch_{epoch}.pth')
            logger.info(
                'Save model: epoch {}, dev accuracy {}, test accuracy {}'.
                format(epoch, dev_accuracy, test_accuracy))
            # 计算模型运行时间
            duration = time.time() - start_time
            logger.info('Epoch {} finished in {:.2f} minutes'.format(
                epoch, duration / 60))

            if abs(prev_loss - loss) <= args.early_stopping:
                logger.info(
                    'Early stopping. Loss changed by less than {}.'.format(
                        args.early_stopping))
                break
            prev_loss = loss
    else:
        # 进行测试
        dev_accuracies = evaluate(dev_loader, model, args.topk)
        for k in args.topk:
            logger.info(
                f'Evaluation metrices: top-{k} dev accuracy = {dev_accuracies[k]}%'
            )

        test_accuracies = evaluate(test_loader, model, args.topk)
        for k in args.topk:
            logger.info(
                f'Evaluation metrices: top-{k} test accuracy = {test_accuracies[k]}%'
            )
def nested_search(device, dir='experiment'):
    dir = os.path.join(
        dir,
        utility.cleanText(f"rLut-{args.rLUT}_rThroughput-{args.rThroughput}"))
    if os.path.exists(dir) is False:
        os.makedirs(dir)
    filepath = os.path.join(
        dir, utility.cleanText(f"nested_{args.episodes}-episodes"))
    logger = utility.get_logger(filepath)
    csvfile = open(filepath + '.csv', mode='w+', newline='')
    writer = csv.writer(csvfile)
    logger.info(f"INFORMATION")
    logger.info(f"mode: \t\t\t\t\t {'nested'}")
    logger.info(f"dataset: \t\t\t\t {args.dataset}")
    logger.info(f"seed: \t\t\t\t {args.seed}")
    logger.info(f"gpu: \t\t\t\t {args.gpu}")
    logger.info(f"number of child network layers: \t {args.layers}")
    logger.info(f"include batchnorm: \t\t\t {args.batchnorm}")
    logger.info(f"include stride: \t\t\t {not args.no_stride}")
    logger.info(f"include pooling: \t\t\t {not args.no_pooling}")
    logger.info(f"skip connection: \t\t\t {args.skip}")
    logger.info(f"required # LUTs: \t\t\t {args.rLUT}")
    logger.info(f"required throughput: \t\t\t {args.rThroughput}")
    logger.info(f"Assumed frequency: \t\t\t {CLOCK_FREQUENCY}")
    logger.info(f"training epochs: \t\t\t {args.epochs}")
    logger.info(f"data augmentation: \t\t\t {args.augment}")
    logger.info(f"batch size: \t\t\t\t {args.batch_size}")
    logger.info(f"controller learning rate: \t\t {args.learning_rate}")
    logger.info(f"architecture episodes: \t\t\t {args.episodes1}")
    logger.info(f"quantization episodes: \t\t\t {args.episodes2}")
    logger.info(f"using multi gpus: \t\t\t {args.multi_gpu}")
    logger.info(f"architecture space: ")
    for name, value in ARCH_SPACE.items():
        logger.info(name + f": \t\t\t\t {value}")
    logger.info(f"quantization space: ")
    for name, value in QUAN_SPACE.items():
        logger.info(name + f": \t\t\t {value}")
    train_data, val_data = data.get_data(args.dataset,
                                         device,
                                         shuffle=True,
                                         batch_size=args.batch_size,
                                         augment=args.augment)
    input_shape, num_classes = data.get_info(args.dataset)
    writer.writerow(["ID"] +
                    ["Layer {}".format(i)
                     for i in range(args.layers)] + ["Accuracy"] + [
                         "Partition (Tn, Tm)", "Partition (#LUTs)",
                         "Partition (#cycles)", "Total LUT", "Total Throughput"
                     ] + ["Time"])
    arch_agent = Agent(ARCH_SPACE,
                       args.layers,
                       lr=args.learning_rate,
                       device=torch.device('cpu'),
                       skip=args.skip)
    arch_id, total_time = 0, 0
    logger.info('=' * 50 + "Start exploring architecture space" + '=' * 50)
    best_arch = BestSamples(5)
    for e1 in range(args.episodes1):
        logger.info('-' * 130)
        arch_id += 1
        start = time.time()
        arch_rollout, arch_paras = arch_agent.rollout()
        logger.info("Sample Architecture ID: {}, Sampled arch: {}".format(
            arch_id, arch_rollout))
        model, optimizer = child.get_model(input_shape,
                                           arch_paras,
                                           num_classes,
                                           device,
                                           multi_gpu=args.multi_gpu,
                                           do_bn=args.batchnorm)
        backend.fit(model,
                    optimizer,
                    train_data,
                    val_data,
                    epochs=args.epochs,
                    verbosity=args.verbosity)
        quan_id = 0
        best_quan_reward = -1
        logger.info('=' * 50 + "Start exploring quantization space" + '=' * 50)
        quan_agent = Agent(QUAN_SPACE,
                           args.layers,
                           lr=args.learning_rate,
                           device=torch.device('cpu'),
                           skip=False)
        for e2 in range(args.episodes2):
            quan_id += 1
            quan_rollout, quan_paras = quan_agent.rollout()
            fpga_model = FPGAModel(rLUT=args.rLUT,
                                   rThroughput=args.rThroughput,
                                   arch_paras=arch_paras,
                                   quan_paras=quan_paras)
            if fpga_model.validate():
                _, quan_reward = backend.fit(model,
                                             optimizer,
                                             val_data=val_data,
                                             quan_paras=quan_paras,
                                             epochs=1,
                                             verbosity=args.verbosity)
            else:
                quan_reward = 0
            logger.info(
                "Sample Quantization ID: {}, Sampled Quantization: {}, reward: {}"
                .format(quan_id, quan_rollout, quan_reward))
            quan_agent.store_rollout(quan_rollout, quan_reward)
            if quan_reward > best_quan_reward:
                best_quan_reward = quan_reward
                best_quan_rollout, best_quan_paras = quan_rollout, quan_paras
        logger.info('=' * 50 + "Quantization space exploration finished" +
                    '=' * 50)
        arch_reward = best_quan_reward
        arch_agent.store_rollout(arch_rollout, arch_reward)
        end = time.time()
        ep_time = end - start
        total_time += ep_time
        best_arch.register(
            arch_id,
            utility.combine_rollout(arch_rollout, best_quan_rollout,
                                    args.layers), arch_reward)
        writer.writerow([arch_id] + [
            str(arch_paras[i]) + '\n' + str(best_quan_paras[i])
            for i in range(args.layers)
        ] + [arch_reward] + list(fpga_model.get_info()) + [ep_time])
        logger.info(f"Reward: {arch_reward}, " + f"Elasped time: {ep_time}, " +
                    f"Average time: {total_time/(e1+1)}")
        logger.info(f"Best Reward: {best_arch.reward_list[0]}, " +
                    f"ID: {best_arch.id_list[0]}, " +
                    f"Rollout: {best_arch.rollout_list[0]}")
    logger.info('=' * 50 +
                "Architecture & quantization sapce exploration finished" +
                '=' * 50)
    logger.info(f"Total elasped time: {total_time}")
    logger.info(f"Best samples: {best_arch}")
    csvfile.close()
Beispiel #30
0
 def __init__(self, message_bus):
     self._message_bus = message_bus   # 消息总线
     self._logger = get_logger()       # 日志
     self._http_stream = HTTPStream.instance()
parser = argparse.ArgumentParser(
    description='zero configuration predictic modeling script. Requires a pandas HDFS dataframe file ' + \
                'and a yaml parameter file as input as input')
parser.add_argument(
    '-d',
    '--data_file',
    nargs=1,
    help=
    'input pandas HDFS dataframe .h5 with an unique indentifier and a target column\n'
    + 'as well as additional data columns\n'
    'default values are cust_id and category or need to be defined in an\n' +
    'optional parameter file ')
parser.add_argument('-p', '--param_file', help='input yaml parameter file')

args = parser.parse_args()
logger = utl.get_logger(os.path.basename(__file__))
logger.info("Program started with the following arguments:")
logger.info(args)

###########################################################
# set dir to project dir
###########################################################
abspath = os.path.abspath(__file__)
dname = os.path.dirname(os.path.dirname(abspath))
os.chdir(dname)

###########################################################
# file check for the parameter
###########################################################
param_file = ''
if args.param_file:
Beispiel #32
0
from json import JSONDecodeError
from time import sleep

import requests
from pynamodb.exceptions import QueryError, PutError, VerboseClientError

from constants import DEFAULT_PARAMS, MARKET_GROUPS_URI, MARKET_TYPES_URI
from models.market_group import MarketGroup
from models.market_type import MarketType
from utility import get_logger

log = get_logger()

MarketGroup.create_table(read_capacity_units=1, write_capacity_units=1)
MarketType.create_table(read_capacity_units=1, write_capacity_units=1)

market_groups_response = requests.get(MARKET_GROUPS_URI, params=DEFAULT_PARAMS)
market_group_ids = market_groups_response.json()

for market_group_id in market_group_ids:
    market_group_exists = False
    try:
        market_group_exists = MarketGroup.count(market_group_id) != 0
    except (QueryError, KeyError) as e:
        pass

    market_group_uri = MARKET_GROUPS_URI + str(market_group_id) + '/'

    try:
        market_group_details = requests.get(market_group_uri,
                                            params=DEFAULT_PARAMS).json()
Beispiel #33
0
# Name of the target
TARGET = 'target'

# Params
lgb_params = {
    'objective': 'binary',
    'boosting_type': 'gbdt',
    'metric': METRIC,
    'num_threads': N_THREADS,
    'verbose': VERBOSE,
    'seed': SEED,
    'n_estimators': N_ESTIMATORS,
    'early_stopping_rounds': EARLY_STOPPING_ROUNDS
}

logger = utility.get_logger(LOGGER_NAME, MODEL_NUMBER, run_id, LOG_DIR)

utility.set_seed(SEED)
logger.info(f'Running for Model Number {MODEL_NUMBER}')

utility.update_tracking(run_id,
                        "model_number",
                        MODEL_NUMBER,
                        drop_incomplete_rows=True)
utility.update_tracking(run_id, "model_type", MODEL_TYPE)
utility.update_tracking(run_id, "is_test", IS_TEST)
utility.update_tracking(run_id, "n_estimators", N_ESTIMATORS)
utility.update_tracking(run_id, "early_stopping_rounds", EARLY_STOPPING_ROUNDS)
utility.update_tracking(run_id, "random_state", SEED)
utility.update_tracking(run_id, "n_threads", N_THREADS)
#utility.update_tracking(run_id, "learning_rate", LEARNING_RATE)
Beispiel #34
0
"""Provides the methods used for parsing HTML"""

import urllib3
import certifi
from utility import get_logger

HTTP = urllib3.PoolManager(cert_reqs='CERT_REQUIRED', ca_certs=certifi.where())
LOGGER = get_logger('parser')


def get_page_html(url):
    """Obtains the HTML from a URL.

    Returns the raw HTML for a given web page URL.

    Parameters
    ----------
    url: str
        The URL of the web page that should be downloaded.

    Returns
    -------
    HTML
        The raw HTML of the downloaded webpage.
    """
    try:
        request = HTTP.request('GET', url)
        if request.status == 200:
            return request.data
        LOGGER.debug('ParserError: The request failed for %s', url)
        return None