def zeroconf_fit_ensemble(y, atsklrn_tempdir): lo = utl.get_logger(inspect.stack()[0][3]) lo.info("Building ensemble") seed = 1 ensemble = AutoSklearnClassifier( time_left_for_this_task=300, per_run_time_limit=150, ml_memory_limit=20240, ensemble_size=50, ensemble_nbest=200, shared_mode=True, tmp_folder=atsklrn_tempdir, output_folder=atsklrn_tempdir, delete_tmp_folder_after_terminate=False, delete_output_folder_after_terminate=False, initial_configurations_via_metalearning=0, seed=seed) lo.info("Done AutoSklearnClassifier - seed:" + str(seed)) try: lo.debug("Start ensemble.fit_ensemble - seed:" + str(seed)) ensemble.fit_ensemble(task=BINARY_CLASSIFICATION, y=y, metric=autosklearn.metrics.f1, precision='32', dataset_name='foobar', ensemble_size=10, ensemble_nbest=15) except Exception: lo = utl.get_logger(inspect.stack()[0][3]) lo.exception("Error in ensemble.fit_ensemble - seed:" + str(seed)) raise lo = utl.get_logger(inspect.stack()[0][3]) lo.debug("Done ensemble.fit_ensemble - seed:" + str(seed)) sleep(20) lo.info("Ensemble built - seed:" + str(seed)) lo.info("Show models - seed:" + str(seed)) txtList = str(ensemble.show_models()).split("\n") for row in txtList: lo.info(row) return ensemble
def spawn_autosklearn_classifier(X_train, y_train, seed, dataset_name, time_left_for_this_task, per_run_time_limit, feat_type, memory_limit, atsklrn_tempdir): lo = utl.get_logger(inspect.stack()[0][3]) try: lo.info("Start AutoSklearnClassifier seed=" + str(seed)) clf = AutoSklearnClassifier( time_left_for_this_task=time_left_for_this_task, per_run_time_limit=per_run_time_limit, ml_memory_limit=memory_limit, shared_mode=True, tmp_folder=atsklrn_tempdir, output_folder=atsklrn_tempdir, delete_tmp_folder_after_terminate=False, delete_output_folder_after_terminate=False, initial_configurations_via_metalearning=0, ensemble_size=0, seed=seed) except Exception: lo.exception("Exception AutoSklearnClassifier seed=" + str(seed)) raise lo = utl.get_logger(inspect.stack()[0][3]) lo.info("Done AutoSklearnClassifier seed=" + str(seed)) sleep(seed) try: lo.info("Starting seed=" + str(seed)) try: clf.fit(X_train, y_train, metric=autosklearn.metrics.f1, feat_type=feat_type, dataset_name=dataset_name) except Exception: lo = utl.get_logger(inspect.stack()[0][3]) lo.exception("Error in clf.fit - seed:" + str(seed)) raise except Exception: lo = utl.get_logger(inspect.stack()[0][3]) lo.exception("Exception in seed=" + str(seed) + ". ") traceback.print_exc() raise lo = utl.get_logger(inspect.stack()[0][3]) lo.info("####### Finished seed=" + str(seed)) return None
def train_multicore(X, y, feat_type, memory_limit, atsklrn_tempdir, pool_size=1, per_run_time_limit=60): lo = utl.get_logger(inspect.stack()[0][3]) time_left_for_this_task = calculate_time_left_for_this_task( pool_size, per_run_time_limit) lo.info("Max time allowance for a model " + str(math.ceil(per_run_time_limit / 60.0)) + " minute(s)") lo.info("Overal run time is about " + str(2 * math.ceil(time_left_for_this_task / 60.0)) + " minute(s)") processes = [] for i in range(2, pool_size + 2): # reserve seed 1 for the ensemble building seed = i pr = multiprocessing.Process(target=spawn_autosklearn_classifier, args=(X, y, i, 'foobar', time_left_for_this_task, per_run_time_limit, feat_type, memory_limit, atsklrn_tempdir)) pr.start() lo.info("Multicore process " + str(seed) + " started") processes.append(pr) for pr in processes: pr.join() lo.info("Multicore fit completed")
def _init_environment(self): # Folders for storage/retrival self.main_directory = '../' self.checkpoint_dir = self.main_directory + 'checkpts/' + self.model_name + '/' self.tensorboard_dir = self.main_directory + 'tb_graphs/' + self.model_name + '/' self.solutions_dir = self.main_directory + 'solutions/' + self.model_name + '/' logging_directory = self.main_directory + 'logs/' for dir_name in [ self.checkpoint_dir, self.tensorboard_dir, self.solutions_dir, logging_directory ]: if not os.path.exists(dir_name): os.makedirs(dir_name) self.num_classes = 10 if self.FLAGS.small else 345 if self.FLAGS.gpu != -1: os.environ["CUDA_DEVICE_ORDER"] = 'PCI_BUS_ID' os.environ["CUDA_VISIBLE_DEVICES"] = str( self.FLAGS.gpu) # export CUDA_VISIBLE_DEVICES=5 if in_jupyter(): get_ipython().system( 'echo "GPU Device in use: \'$CUDA_VISIBLE_DEVICES\'"') # pylint: disable=E0602 else: os.system('echo "GPU Device in use: \'$CUDA_VISIBLE_DEVICES\'"') log_in_file = True if log_in_file: logger = get_logger(self.model_name, logging_directory) self.logging = logger.info else: self.logging = print
def main(): args = get_args() logger = get_logger(args.write_log) os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu device_ids_str = args.gpu.split(',') device_ids = [] for i in range(len(device_ids_str)): device_ids.append(i) multi_gpu = False if args.mode != "prep": logger.info("Loading network") model = AdaMatting(in_channel=4) optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=0) if args.cuda: device = torch.device("cuda:{}".format(device_ids[0])) if len(device_ids) > 1 and args.mode=="train": logger.info("Loading with multiple GPUs") model = torch.nn.DataParallel(model, device_ids=device_ids) multi_gpu = True model = model.cuda(device=device_ids[0]) else: device = torch.device("cpu") if args.mode == "train": logger.info("Program runs in train mode") train(model=model, optimizer=optimizer, device=device, args=args, logger=logger, multi_gpu=multi_gpu) elif args.mode == "test": logger.info("Program runs in test mode") test() elif args.mode == "prep": logger.info("Program runs in prep mode") # composite_dataset(args.raw_data_path, logger) gen_train_valid_names(args.valid_portion, logger)
def __init__(self, bot_jid, stream): self.bot_jid = bot_jid self._stream = stream self.cmd_handler = CommandHandler(message_bus = self) self.admin_cmd_handler = AdminCMDHandler(message_bus = self) self.logger = get_logger() self.offline_split_symbol = "$_$_$_$" return
def __init__(self, bot_jid, stream): self.bot_jid = bot_jid self._stream = stream self.cmd_handler = CommandHandler(message_bus=self) self.admin_cmd_handler = AdminCMDHandler(message_bus=self) self.logger = get_logger() self.offline_split_symbol = "$_$_$_$" return
def evaluate_runtime(self): log_name = datetime.now().strftime('result_runtime.log') summary_logger = util.get_logger(opt.result_dir, log_name) result = {} log = '' log += 'OutputNode\t' for lr in opt.dash_lr: log += '{}p\t'.format(lr) result[lr] = {} summary_logger.info(log) batch_num = self.opt.test_num_batch for lr in opt.dash_lr: self.dataset.setTargetLR(lr) self.model.setTargetScale(self.dataset.getTargetScale()) for node in self.model.getOutputNodes(): elapsed_times = [] t_w = RESOLUTION[lr][0] t_h = RESOLUTION[lr][1] input = torch.FloatTensor(batch_num, 3, t_w, t_h).random_(0, 1).to(self.device) try: for _ in range(DUMMY_TRIAL): output = self.model(input, node) torch.cuda.synchronize() for _ in range(TEST_TRIAL): start_time = time.perf_counter() output = self.model(input, node) torch.cuda.synchronize() end_time = time.perf_counter() elapsed_time = (end_time - start_time) elapsed_times.append(elapsed_time) except Exception as e: print(e) sys.exit() average_elapsed_time = np.sum(elapsed_times) / (TEST_TRIAL * batch_num) result[lr][node] = average_elapsed_time print( '[Resolution: Size ({}x{}), OutputNode: {}] / Inference time per frame(sec) {} / Max-Min(sec) {}' .format( t_w, t_h, node, round(average_elapsed_time, 4), round( np.max(elapsed_times) - np.min(elapsed_times), 4))) for node in self.output_nodes: log = '' log += '{}\t'.format(node) for lr in self.node2res[node]: log += '{}\t'.format(round(result[lr][node], 4)) summary_logger.info(log)
def __init__(self, context_name, log_level, log_stream, log_folder, database_module, db_configuration, **kwargs): self.logger = get_logger(context_name, log_level, log_stream, log_folder) print_start(self.logger) self.db = database_module(self.logger, **db_configuration) self.logger.info("{} setup complete !!".format(context_name)) if self.db != None: self.push_todb = self.db.put else: raise ValueError("No database parameter given")
def __init__(self , log_level , log_stream , log_folder="logs"): """[Multi Processing class] Responsible for running the lambda functions passed in inside threads Arguments: log_level {[string]} -- Levels of log for each process log_stream {[string]} -- Stream of log for each process """ self.logger = get_logger(__name__, log_level, log_stream , log_folder) self.process_list=[]
def define_pool_size(memory_limit): # some classifiers can use more than one core - so keep this at half memory and cores max_pool_size = int( math.ceil(psutil.virtual_memory().total / (memory_limit * 1000000))) half_of_cores = int(math.ceil(psutil.cpu_count() / 2.0)) lo = utl.get_logger(inspect.stack()[0][3]) lo.info("Virtual Memory Size = " + str(psutil.virtual_memory().total)) lo.info("CPU Count =" + str(psutil.cpu_count())) lo.info("Max CPU Pool Size by Memory = " + str(max_pool_size)) return half_of_cores if max_pool_size > half_of_cores else max_pool_size
def x_y_dataframe_split(dataframe, parameter, id=False): lo = utl.get_logger(inspect.stack()[0][3]) lo.info("Dataframe split into X and y") X = dataframe.drop([parameter["id_field"], parameter["target_field"]], axis=1) y = pd.np.array(dataframe[parameter["target_field"]], dtype='int') if id: row_id = dataframe[parameter["id_field"]] return X, y, row_id else: return X, y
def max_estimators_fit_duration(X, y, max_classifier_time_budget, logger, sample_factor=1): lo = utl.get_logger(inspect.stack()[0][3]) lo.info("Constructing preprocessor pipeline and transforming sample data") # we don't care about the data here but need to preprocess, otherwise the classifiers crash pipeline = SimpleClassificationPipeline(include={ 'imputation': ['most_frequent'], 'rescaling': ['standardize'] }) default_cs = pipeline.get_hyperparameter_search_space( ).get_default_configuration() pipeline = pipeline.set_hyperparameters(default_cs) pipeline.fit(X, y) X_tr, dummy = pipeline.fit_transformer(X, y) lo.info("Running estimators on the sample") # going over all default classifiers used by auto-sklearn clfs = autosklearn.pipeline.components.classification._classifiers processes = [] with multiprocessing.Manager() as manager: max_clf_time = manager.Value('i', 3) # default 3 sec for clf_name, clf_class in clfs.items(): pr = multiprocessing.Process(target=time_single_estimator, name=clf_name, args=(clf_name, clf_class, X_tr, y, max_clf_time, logger)) pr.start() processes.append(pr) for pr in processes: pr.join(max_classifier_time_budget ) # will block for max_classifier_time_budget or # until the classifier fit process finishes. After max_classifier_time_budget # we will terminate all still running processes here. if pr.is_alive(): logger.info("Terminating " + pr.name + " process due to timeout") pr.terminate() result_max_clf_time = max_clf_time.value lo.info("Test classifier fit completed") per_run_time_limit = int(sample_factor * result_max_clf_time) return max_classifier_time_budget if per_run_time_limit > max_classifier_time_budget else per_run_time_limit
def log_from_general() -> None: """Showcase a simple log with different levels.""" # make sure the name matches the customization logger = utility.get_logger("LogDemo") message = "from LogDemo" logger.debug(f"DEBUG - {message}") logger.info(f"INFO - {message}") logger.warning(f"WARNING - {message}", exc_info=True) logger.exception(f"ERROR - {message}") logger.error(f"ERROR - {message}", exc_info=True) # same as `exception()` logger.critical(f"CRITICAL - {message}", exc_info=True) return
def __init__(self): my_jid = JID(USER+'/Bot') self.my_jid = my_jid settings = XMPPSettings({ "software_name": "Clubot", "software_version": __version__, "software_os": "Linux", "tls_verify_peer": False, "starttls": True, "ipv6":False, "poll_interval": 10, }) settings["password"] = PASSWORD version_provider = VersionProvider(settings) self.connected = False mainloop = TornadoMainLoop(settings) self.client = Client(my_jid, [self, version_provider], settings, mainloop) #self.client = Client(my_jid, [self, version_provider], settings) self.logger = get_logger() self.trytimes = 0 self.sended = [] Logics.empty_status()
def __init__(self): my_jid = JID(USER + '/Bot') self.my_jid = my_jid settings = XMPPSettings({ "software_name": "Clubot", "software_version": __version__, "software_os": "Linux", "tls_verify_peer": False, "starttls": True, "ipv6": False, "poll_interval": 10, }) settings["password"] = PASSWORD version_provider = VersionProvider(settings) self.connected = False mainloop = TornadoMainLoop(settings) self.client = Client(my_jid, [self, version_provider], settings, mainloop) #self.client = Client(my_jid, [self, version_provider], settings) self.logger = get_logger() self.trytimes = 0 self.sended = [] Logics.empty_status()
# Training # ############ if save_checkpts or restore: saver = tf.train.Saver(max_to_keep=10) checkpoint_dir = checkpoints_directory + model_name + '/' tensorboard_dir = tensorboard_directory + model_name + '/' if save_checkpts and not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) if log_in_tb and not os.path.exists(tensorboard_dir): os.makedirs(tensorboard_dir) if log_in_file: if not os.path.exists(logging_directory): os.makedirs(logging_directory) logger = get_logger(model_name, logging_directory) logging = logger.info else: logging = print logging("Current model: \n\t{}".format(model_name)) config = tf.ConfigProto() config.gpu_options.allow_growth=True with tf.Session(config=config) as sess: sess.run(tf.global_variables_initializer()) sess.run(tf.tables_initializer()) # Recover previous work ckpt = tf.train.get_checkpoint_state(os.path.dirname(checkpoint_dir + 'checkpoint')) if restore and ckpt and ckpt.model_checkpoint_path:
This module defines the Article abstract base class alongside with many specific classes used to store articles scraped from various newspapers of interest. Supported newspaper in alphabetical order: La Repubblica (LaRepubblicaArticle) """ import abc from parser import get_page_html from bs4 import BeautifulSoup from utility import get_logger LOGGER = get_logger('article') class ArticleException(Exception): """Custom exception for the Article class. Attributes ---------- msg: str Human readable string describing the exception. """ def __init__(self, msg): """Initializer for the ExtractorException class. Parameters
output_input, video_info)) while (1): input = output_output.recv() if input[0] == 'output': end_time = time.time() elapsed_time = end_time - start_time fps = segment_fps * segment_size / (end_time - start_time) print( 'overall [elapsed], resolution [{}p] : {} second, {} fps' .format(resolution, elapsed_time, fps)) elapsed_time_list[resolution].append(elapsed_time) fps_list[resolution].append(fps) break else: print('request: Invalid input') break #print statistics runtimeLogger = util.get_logger(opt.result_dir, 'result_video_runtime.log') for resolution in resolution_list: print('[{}p]: minmum {} fps, average {} fps, maximum {} fps'.format( resolution, np.min(fps_list[resolution]), np.average(fps_list[resolution]), np.max(fps_list[resolution]))) log_str = "\t".join(map(str, fps_list[resolution])) runtimeLogger.info(log_str) #terminate processes sr_process.terminate() decode_process.terminate() encode_process.terminate()
def sync_search(device, dir='experiment'): dir = os.path.join( dir, utility.cleanText(f"rLut-{args.rLUT}_rThroughput-{args.rThroughput}")) if os.path.exists(dir) is False: os.makedirs(dir) filepath = os.path.join( dir, utility.cleanText(f"joint_{args.episodes}-episodes")) logger = utility.get_logger(filepath) csvfile = open(filepath + '.csv', mode='w+', newline='') writer = csv.writer(csvfile) tb_writer = SummaryWriter(filepath) logger.info(f"INFORMATION") logger.info(f"mode: \t\t\t\t\t {'joint'}") logger.info(f"dataset: \t\t\t\t {args.dataset}") logger.info(f"number of child network layers: \t {args.layers}") logger.info(f"seed: \t\t\t\t {args.seed}") logger.info(f"gpu: \t\t\t\t {args.gpu}") logger.info(f"include batchnorm: \t\t\t {args.batchnorm}") logger.info(f"include stride: \t\t\t {not args.no_stride}") logger.info(f"include pooling: \t\t\t {not args.no_pooling}") logger.info(f"skip connection: \t\t\t {args.skip}") logger.info(f"required # LUTs: \t\t\t {args.rLUT}") logger.info(f"required throughput: \t\t\t {args.rThroughput}") logger.info(f"Assumed frequency: \t\t\t {CLOCK_FREQUENCY}") logger.info(f"training epochs: \t\t\t {args.epochs}") logger.info(f"data augmentation: \t\t\t {args.augment}") logger.info(f"batch size: \t\t\t\t {args.batch_size}") logger.info(f"controller learning rate: \t\t {args.learning_rate}") logger.info(f"controller learning rate: \t\t {args.learning_rate}") logger.info(f"architecture episodes: \t\t\t {args.episodes}") logger.info(f"using multi gpus: \t\t\t {args.multi_gpu}") logger.info(f"architecture space: ") for name, value in ARCH_SPACE.items(): logger.info(name + f": \t\t\t\t {value}") logger.info(f"quantization space: ") for name, value in QUAN_SPACE.items(): logger.info(name + f": \t\t\t {value}") agent = Agent({ **ARCH_SPACE, **QUAN_SPACE }, args.layers, lr=args.learning_rate, device=torch.device('cpu'), skip=args.skip) train_data, val_data = data.get_data(args.dataset, device, shuffle=True, batch_size=args.batch_size, augment=args.augment) input_shape, num_classes = data.get_info(args.dataset) ## (3,32,32) -> (1,3,32,32) add batch dimension sample_input = utility.get_sample_input(device, input_shape) writer.writerow(["ID"] + ["Layer {}".format(i) for i in range(args.layers)] + ["Accuracy"] + [ "Partition (Tn, Tm)", "Partition (#LUTs)", "Partition (#cycles)", "Total LUT", "Total Throughput" ] + ["Time"]) arch_id, total_time = 0, 0 best_reward = float('-inf') logger.info('=' * 50 + "Start exploring architecture & quantization space" + '=' * 50) best_samples = BestSamples(5) for e in range(args.episodes): logger.info('-' * 130) arch_id += 1 start = time.time() rollout, paras = agent.rollout() logger.info("Sample Architecture ID: {}, Sampled actions: {}".format( arch_id, rollout)) arch_paras, quan_paras = utility.split_paras(paras) fpga_model = FPGAModel(rLUT=args.rLUT, rThroughput=args.rThroughput, arch_paras=arch_paras, quan_paras=quan_paras) if fpga_model.validate(): model, optimizer = child.get_model(input_shape, arch_paras, num_classes, device, multi_gpu=args.multi_gpu, do_bn=args.batchnorm) if args.verbosity > 1: print(model) torchsummary.summary(model, input_shape) if args.adapt: num_w = utility.get_net_param(model) macs = utility.get_net_macs(model, sample_input) tb_writer.add_scalar('num_param', num_w, arch_id) tb_writer.add_scalar('macs', macs, arch_id) if args.verbosity > 1: print(f"# of param: {num_w}, macs: {macs}") _, val_acc = backend.fit(model, optimizer, train_data, val_data, quan_paras=quan_paras, epochs=args.epochs, verbosity=args.verbosity) else: val_acc = 0 if args.adapt: ## TODO: how to make arch_reward function with macs and latency? arch_reward = val_acc else: arch_reward = val_acc agent.store_rollout(rollout, arch_reward) end = time.time() ep_time = end - start total_time += ep_time best_samples.register(arch_id, rollout, arch_reward) tb_writer.add_scalar('val_acc', val_acc, arch_id) tb_writer.add_scalar('arch_reward', arch_reward, arch_id) if arch_reward > best_reward: best_reward = arch_reward tb_writer.add_scalar('best_reward', best_reward, arch_id) tb_writer.add_graph(model.eval(), (sample_input, )) writer.writerow([arch_id] + [str(paras[i]) for i in range(args.layers)] + [arch_reward] + list(fpga_model.get_info()) + [ep_time]) logger.info(f"Reward: {arch_reward}, " + f"Elasped time: {ep_time}, " + f"Average time: {total_time/(e+1)}") logger.info(f"Best Reward: {best_samples.reward_list[0]}, " + f"ID: {best_samples.id_list[0]}, " + f"Rollout: {best_samples.rollout_list[0]}") logger.info('=' * 50 + "Architecture & quantization sapce exploration finished" + '=' * 50) logger.info(f"Total elasped time: {total_time}") logger.info(f"Best samples: {best_samples}") tb_writer.close() csvfile.close()
def evaluate_quality(self): #summary log log_name = datetime.now().strftime( 'result_quality_summary_{}.log'.format(opt.test_num_epoch)) summary_logger = util.get_logger(opt.result_dir, log_name) log = '' log += 'outputIdx\t' for lr in opt.dash_lr: log += 'PSNR(SR, {}p)\t'.format(lr) log += 'PSNR(bicubic, {}p)\t'.format(lr) log += '\t' log += 'SSIM(SR, {}p)\t'.format(lr) log += 'SSIM(bicubic, {}p)\t'.format(lr) log += '\t' summary_logger.info(log) #detail log (per frame) detail_logger = {} for output_node in self.output_nodes: detaill_logname = datetime.now().strftime( 'result_quality_detail_{}_{}.log'.format( output_node, opt.test_num_epoch)) detail_logger[output_node] = util.get_logger( opt.result_dir, detaill_logname) for output_node in self.output_nodes: log = '' log += 'FrameIdx\t' for lr in self.node2res[output_node]: log += 'PSNR(SR, {}p)\t'.format(lr) log += 'PSNR(bicubic, {}p)\t'.format(lr) log += '\t' log += 'SSIM(SR, {}p)\t'.format(lr) log += 'SSIM(bicubic, {}p)\t'.format(lr) log += '\t' detail_logger[output_node].info(log) #analyze baseline_result = self._analyze_baseline() sr_result = {} for output_node in self.output_nodes: sr_result[output_node] = self._analyze_sr(output_node) #logging for output_node in self.output_nodes: #analyze log = '' log += '{}\t'.format(output_node) for lr in opt.dash_lr: if lr in self.node2res[output_node]: log += '{}\t'.format( np.mean(sr_result[output_node][lr].psnr)) log += '{}\t'.format(np.mean(baseline_result[lr].psnr)) log += '\t' log += '{}\t'.format( np.mean(sr_result[output_node][lr].ssim)) log += '{}\t'.format(np.mean(baseline_result[lr].ssim)) log += '\t' else: log += '\t' log += '\t' log += '\t' log += '\t' log += '\t' log += '\t' summary_logger.info(log) for idx in range(len(self.dataset)): log = '' log += '{}\t'.format(idx) for lr in opt.dash_lr: if lr in self.node2res[output_node]: log += '{}\t'.format( sr_result[output_node][lr].psnr[idx]) log += '{}\t'.format(baseline_result[lr].psnr[idx]) log += '\t' log += '{}\t'.format( sr_result[output_node][lr].ssim[idx]) log += '{}\t'.format(baseline_result[lr].ssim[idx]) log += '\t' else: log += '\t' log += '\t' log += '\t' log += '\t' log += '\t' log += '\t' detail_logger[output_node].info(log)
def __init__(self, message_bus): self._message_bus = message_bus # 消息总线 self._logger = get_logger() # 日志 self._http_stream = TornadoHTTPClient() self._honor = Honor()
def __init__(self, message_bus): self._message_bus = message_bus # 消息总线 self._logger = get_logger() # 日志 self._http_stream = HTTPStream.instance()
if __name__ == "__main__": with open(CONFIG) as file: configuration = json.load(file) Database_module, db_configuration = parse_dbconfig(configuration) ## reading logging configuration logging_configuration = configuration["logging"] log_folder = logging_configuration["output"] if not log_folder in os.listdir('.'): os.mkdir(log_folder) logger = get_logger(__name__, log_level, log_stream, log_folder) ## logger for main thread ## logger test in main thread print_start(logger) logger.info("Application started , Extracting all the plugins") ## handles creating mutiple process ## from single process using MultiProcessing import_list = configuration["plugins"] with MultiProcessingContext(log_level, log_stream, log_folder) as execute: for attr in import_list: path = attr["filename"]
La Repubblica (LaRepubblicaExtractor) """ import re import abc import csv from parser import get_page_html from datetime import date, timedelta from tqdm import tqdm from bs4 import BeautifulSoup from article import ArticleException, LaRepubblicaArticle from utility import datespan, get_logger LOGGER = get_logger('extractor') class ExtractorException(Exception): """Custom exception for the Extractor class. Attributes ---------- msg: str Human readable string describing the exception. """ def __init__(self, msg): """Initializer for the ExtractorException class. Parameters
def quantization_search(device, dir='experiment'): dir = os.path.join( dir, utility.cleanText(f"rLut-{args.rLUT}_rThroughput-{args.rThroughput}")) if os.path.exists(dir) is False: os.makedirs(dir) filepath = os.path.join( dir, utility.cleanText(f"quantization_{args.episodes}-episodes")) logger = utility.get_logger(filepath) csvfile = open(filepath + '.csv', mode='w+', newline='') writer = csv.writer(csvfile) logger.info(f"INFORMATION") logger.info(f"mode: \t\t\t\t\t {'quantization'}") logger.info(f"dataset: \t\t\t\t {args.dataset}") logger.info(f"seed: \t\t\t\t {args.seed}") logger.info(f"gpu: \t\t\t\t {args.gpu}") logger.info(f"number of child network layers: \t {args.layers}") logger.info(f"include batchnorm: \t\t\t {args.batchnorm}") logger.info(f"include stride: \t\t\t {not args.no_stride}") logger.info(f"include pooling: \t\t\t {not args.no_pooling}") logger.info(f"skip connection: \t\t\t {args.skip}") logger.info(f"required # LUTs: \t\t\t {args.rLUT}") logger.info(f"required throughput: \t\t\t {args.rThroughput}") logger.info(f"Assumed frequency: \t\t\t {CLOCK_FREQUENCY}") logger.info(f"training epochs: \t\t\t {args.epochs}") logger.info(f"data augmentation: \t\t\t {args.augment}") logger.info(f"batch size: \t\t\t\t {args.batch_size}") logger.info(f"controller learning rate: \t\t {args.learning_rate}") logger.info(f"architecture episodes: \t\t\t {args.episodes}") logger.info(f"using multi gpus: \t\t\t {args.multi_gpu}") logger.info(f"architecture space: ") # for name, value in ARCH_SPACE.items(): # logger.info(name + f": \t\t\t\t {value}") logger.info(f"quantization space: ") for name, value in QUAN_SPACE.items(): logger.info(name + f": \t\t\t {value}") agent = Agent(QUAN_SPACE, args.layers, lr=args.learning_rate, device=torch.device('cpu'), skip=False) train_data, val_data = data.get_data(args.dataset, device, shuffle=True, batch_size=args.batch_size, augment=args.augment) input_shape, num_classes = data.get_info(args.dataset) writer.writerow(["ID"] + ["Layer {}".format(i) for i in range(args.layers)] + ["Accuracy"] + [ "Partition (Tn, Tm)", "Partition (#LUTs)", "Partition (#cycles)", "Total LUT", "Total Throughput" ] + ["Time"]) child_id, total_time = 0, 0 logger.info('=' * 50 + "Start exploring quantization space" + '=' * 50) best_samples = BestSamples(5) A1 = [{ 'filter_height': 3, 'filter_width': 3, 'stride_height': 1, 'stride_width': 1, 'num_filters': 64, 'pool_size': 1 }, { 'filter_height': 7, 'filter_width': 5, 'stride_height': 1, 'stride_width': 1, 'num_filters': 48, 'pool_size': 1 }, { 'filter_height': 5, 'filter_width': 5, 'stride_height': 2, 'stride_width': 1, 'num_filters': 48, 'pool_size': 1 }, { 'filter_height': 3, 'filter_width': 5, 'stride_height': 1, 'stride_width': 1, 'num_filters': 64, 'pool_size': 1 }, { 'filter_height': 5, 'filter_width': 7, 'stride_height': 1, 'stride_width': 1, 'num_filters': 36, 'pool_size': 1 }, { 'filter_height': 3, 'filter_width': 1, 'stride_height': 1, 'stride_width': 2, 'num_filters': 64, 'pool_size': 2 }] A2 = [{ 'filter_height': 3, 'filter_width': 3, 'stride_height': 1, 'stride_width': 1, 'num_filters': 24, 'pool_size': 1 }, { 'filter_height': 5, 'filter_width': 5, 'stride_height': 1, 'stride_width': 1, 'num_filters': 36, 'pool_size': 1 }, { 'filter_height': 5, 'filter_width': 5, 'stride_height': 2, 'stride_width': 1, 'num_filters': 64, 'pool_size': 1 }, { 'filter_height': 5, 'filter_width': 5, 'stride_height': 1, 'stride_width': 1, 'num_filters': 64, 'pool_size': 1 }, { 'filter_height': 5, 'filter_width': 5, 'stride_height': 1, 'stride_width': 2, 'num_filters': 24, 'pool_size': 1 }, { 'filter_height': 3, 'filter_width': 3, 'stride_height': 1, 'stride_width': 2, 'num_filters': 64, 'pool_size': 1 }] B1 = [{ 'filter_height': 3, 'filter_width': 3, 'num_filters': 64, 'pool_size': 1 }, { 'filter_height': 3, 'filter_width': 5, 'num_filters': 64, 'pool_size': 1 }, { 'filter_height': 3, 'filter_width': 3, 'num_filters': 64, 'pool_size': 2 }, { 'filter_height': 5, 'filter_width': 5, 'num_filters': 64, 'pool_size': 2 }, { 'filter_height': 5, 'filter_width': 3, 'num_filters': 64, 'pool_size': 1 }, { 'filter_height': 7, 'filter_width': 7, 'num_filters': 64, 'pool_size': 1 }] B2 = [{ 'filter_height': 5, 'filter_width': 3, 'num_filters': 64, 'pool_size': 1 }, { 'filter_height': 3, 'filter_width': 5, 'num_filters': 64, 'pool_size': 1 }, { 'filter_height': 3, 'filter_width': 5, 'num_filters': 64, 'pool_size': 2 }, { 'filter_height': 5, 'filter_width': 5, 'num_filters': 64, 'pool_size': 2 }, { 'filter_height': 5, 'filter_width': 3, 'num_filters': 64, 'pool_size': 1 }, { 'filter_height': 7, 'filter_width': 7, 'num_filters': 64, 'pool_size': 1 }] arch_paras = B2 model, optimizer = child.get_model(input_shape, arch_paras, num_classes, device, multi_gpu=args.multi_gpu, do_bn=False) _, val_acc = backend.fit(model, optimizer, train_data=train_data, val_data=val_data, epochs=args.epochs, verbosity=args.verbosity) print(val_acc) for e in range(args.episodes): logger.info('-' * 130) child_id += 1 start = time.time() quan_rollout, quan_paras = agent.rollout() logger.info("Sample Quantization ID: {}, Sampled actions: {}".format( child_id, quan_rollout)) fpga_model = FPGAModel(rLUT=args.rLUT, rThroughput=args.rThroughput, arch_paras=arch_paras, quan_paras=quan_paras) if fpga_model.validate(): _, reward = backend.fit(model, optimizer, val_data=val_data, quan_paras=quan_paras, epochs=1, verbosity=args.verbosity) else: reward = 0 agent.store_rollout(quan_rollout, reward) end = time.time() ep_time = end - start total_time += ep_time best_samples.register(child_id, quan_rollout, reward) writer.writerow([child_id] + [str(quan_paras[i]) for i in range(args.layers)] + [reward] + list(fpga_model.get_info()) + [ep_time]) logger.info(f"Reward: {reward}, " + f"Elasped time: {ep_time}, " + f"Average time: {total_time/(e+1)}") logger.info(f"Best Reward: {best_samples.reward_list[0]}, " + f"ID: {best_samples.id_list[0]}, " + f"Rollout: {best_samples.rollout_list[0]}") logger.info('=' * 50 + "Quantization sapce exploration finished" + '=' * 50) logger.info(f"Total elasped time: {total_time}") logger.info(f"Best samples: {best_samples}") csvfile.close()
def run(): args = parse_args() # 初始化随机数种子,以便于复现实验结果 start_epoch = 1 random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) if args.device != -1: torch.cuda.manual_seed(args.seed) device = torch.device(f'cuda:{args.device}' if torch.cuda.is_available() and args.device >= 0 else 'cpu') if torch.cuda.is_available() and args.device >= 0: # 开启这个flag需要保证输入数据的维度不变,不然每次cudnn都要重新优化,反而更加耗时 # 现在RNN部分输入会进行fit length,CNN那里可以启用这个参数 if args.arch in ['stack', 'multi', 'stack_multi']: torch.backends.cudnn.benchmark = True # 输出目录 if args.resume_snapshot: # 判断文件是否存在 assert os.path.exists( args.resume_snapshot), f'{args.resume_snapshot} don"t exist!' model_dir, model_file = os.path.split(args.resume_snapshot) output_dir, _ = os.path.split(model_dir) else: base_dir = time.strftime("%Y-%m-%d_%H:%M:%S", time.localtime()) output_dir = os.path.join(args.out_dir, base_dir) model_dir = os.path.join(output_dir, 'save_model') os.makedirs(output_dir) # 创建输出根目录 os.makedirs(model_dir) # 输出参数 logger = get_logger(output_dir) logger.info(pprint.pformat(vars(args))) logger.info(f'output dir is {output_dir}') # 获取数据集 train_dataset, dev_dataset, test_dataset, vocab, vectors = get_dataset( args, logger) vectors_dim = 300 if vectors is None else vectors.size(1) # 创建迭代器 train_loader = torchtext.data.BucketIterator(train_dataset, args.batch_size, device=device, train=True, shuffle=True, sort=False, repeat=False) dev_loader = torchtext.data.BucketIterator(dev_dataset, args.batch_size, device=device, train=False, shuffle=False, sort=False, repeat=False) test_loader = torchtext.data.BucketIterator(test_dataset, args.batch_size, device=device, train=False, shuffle=False, sort=False, repeat=False) # 创建模型,优化器,损失函数 if args.arch == 'stack': model = StackCNN(vocab_size=len(vocab), embed_dim=vectors_dim, embed_weight=vectors, kernel_sizes=args.stack_kernel_sizes, out_channels=args.stack_out_channels).to(device) elif args.arch == 'multi': model = MultiCNN(vocab_size=len(vocab), embed_dim=vectors_dim, embed_weight=vectors, kernel_sizes=args.multi_kernel_sizes, out_channels=args.multi_out_channels).to(device) elif args.arch == 'stack_multi': model = StackMultiCNN( vocab_size=len(vocab), embed_dim=vectors_dim, embed_weight=vectors, stack_kernel_sizes=args.stack_kernel_sizes, stack_out_channels=args.stack_out_channels, multi_kernel_sizes=args.multi_kernel_sizes, multi_out_channels=args.multi_out_channels).to(device) elif args.arch == 'bigru': assert args.hidden_size.find( ',') == -1, '--hidden-size must be a int for BiLSTM/BiGRU model' hidden_size = int(args.hidden_size) model = BiGRU(vocab_size=len(vocab), embedding_dim=vectors_dim, hidden_size=hidden_size, dropout_r=args.dropout, embed_weight=vectors).to(device) elif args.arch == 'bigru_cnn': assert args.hidden_size.find( ',') == -1, '--hidden-size must be a int for BiLSTM/BiGRU model' hidden_size = int(args.hidden_size) model = BiGRUCNN(vocab_size=len(vocab), embedding_dim=vectors_dim, hidden_size=hidden_size, cnn_channel=args.cnn_channel, dropout_r=args.dropout, embed_weight=vectors).to(device) # elif args.arch == 'norm_stack_multi': # model = NormStackMultiCNN(vocab_size=len(vocab), embed_dim=vectors_dim, sent_length=args.fix_length, # embed_weight=vectors).to(device) # elif args.arch == 'stack_multi_atten': # model = QA_StackMultiAttentionCNN(vocab_size=len(vocab), embed_dim=vectors_dim, embed_weight=vectors).to( # device) # elif args.arch == 'ap_stack_multi': # model = QA_AP_StackMultiCNN(vocab_size=len(vocab), embed_dim=vectors_dim, embed_weight=vectors).to( # device) # elif args.arch == 'bilstm': # assert args.hidden_size.find(',') == -1, '--hidden-size must be a int for LSTM model' # hidden_size = int(args.hidden_size) # model = BiLSTM(vocab_size=len(vocab), embedding_dim=vectors_dim, hidden_size=hidden_size, # dropout_r=args.dropout, embed_weight=vectors).to(device) # elif args.arch == 'stack_bilstm': # hidden_size = [int(i) for i in args.hidden_size.split(',')] # model = StackBiLSTM(vocab_size=len(vocab), embedding_dim=vectors_dim, hidden_size=hidden_size, # mlp_d=args.mlp_d, dropout_r=args.dropout, embed_weight=vectors).to(device) # elif args.arch == 'bigru': # assert args.hidden_size.find(',') == -1, '--hidden-size must be a int for BiLSTM/BiGRU model' # hidden_size = int(args.hidden_size) # model = BiGRU(vocab_size=len(vocab), embedding_dim=vectors_dim, hidden_size=hidden_size, # dropout_r=args.dropout, embed_weight=vectors).to(device) # elif args.arch == 'stack_bigru': # hidden_size = [int(i) for i in args.hidden_size.split(',')] # model = StackBiGRU(vocab_size=len(vocab), embedding_dim=vectors_dim, hidden_size=hidden_size, # mlp_d=args.mlp_d, # sent_max_length=args.fix_length, dropout_r=args.dropout, embed_weight=vectors).to(device) else: raise ValueError("--arch is unknown") # 为特定模型指定特殊的优化函数 if args.optimizer == 'adam': optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) elif args.optimizer == 'rmsprop': optimizer = torch.optim.RMSprop(model.parameters(), lr=args.lr) elif args.optimizer == 'adagrad': optimizer = torch.optim.Adagrad(model.parameters(), lr=args.lr) elif args.optimizer == 'sgd': optimizer = torch.optim.SGD(model.parameters(), lr=args.lr) else: raise ValueError("--optimizer is unknown") loss_fn = torch.nn.MarginRankingLoss(margin=args.margin) architecture = model.__class__.__name__ # 载入以训练的数据 if args.resume_snapshot: state = torch.load(args.resume_snapshot) model.load_state_dict(state['model']) optimizer.load_state_dict(state['optimizer']) epoch = state['epoch'] start_epoch = state['epoch'] + 1 if 'best_dev_score' in state: # 适配旧版本保存的模型参数 dev_acc = state['best_dev_score'] test_acc = 0 else: dev_acc = state['dev_accuracy'] test_acc = state['test_accuracy'] logger.info( f"load state {args.resume_snapshot}, dev accuracy {dev_acc}, test accuracy {test_acc}" ) # 记录参数 with open(f'{output_dir}/arguments.csv', 'a') as f: for k, v in vars(args).items(): f.write(f'{k},{v}\n') # 将日志写入到TensorBoard中 writer = SummaryWriter(output_dir) # 记录模型的计算图 try: q = torch.randint_like(torch.Tensor(1, args.fix_length), 2, 100, dtype=torch.long) ql = torch.Tensor([args.fix_length]).type(torch.int) writer.add_graph(model, ((q, ql), (q, ql))) except Exception as e: logger.error("Failed to save model graph: {}".format(e)) # exit() # 开始训练 best_dev_score = -1 # 记录最优的结果 best_test_score = -1 # 记录最优的结果 prev_loss = 0 # 自动调整学习率 # TODO:暂不启用,Adam已经能够自动调整学习率了 # scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=args.lr_reduce_factor, # patience=args.patience, verbose=True) if not args.skip_training: for epoch in range(start_epoch, start_epoch + args.epochs): start_time = time.time() # train epoch loss = train_epoch(epoch, train_loader, model, optimizer, loss_fn, device) writer.add_scalar('train/loss', loss, epoch) logger.info(f'Train Epoch {epoch}: loss={loss}') # evaluate dev_accuracy = evaluate(dev_loader, model, 1) logger.info( f'Evaluation metrices: dev accuracy = {100. * dev_accuracy}%') writer.add_scalar('dev/lr', optimizer.param_groups[0]['lr'], epoch) writer.add_scalar('dev/acc', dev_accuracy, epoch) # 进行测试 test_accuracy = evaluate(test_loader, model, 1) logger.info( f'Evaluation metrices: test accuracy = {100. * test_accuracy}%' ) writer.add_scalar('test/acc', test_accuracy, epoch) # 保存模型 save_state = { 'epoch': epoch, 'dev_accuracy': dev_accuracy, 'test_accuracy': test_accuracy, 'architecture': architecture, 'model': model.state_dict(), 'optimizer': optimizer.state_dict() } torch.save(save_state, f'{model_dir}/{architecture}_epoch_{epoch}.pth') logger.info( 'Save model: epoch {}, dev accuracy {}, test accuracy {}'. format(epoch, dev_accuracy, test_accuracy)) # 计算模型运行时间 duration = time.time() - start_time logger.info('Epoch {} finished in {:.2f} minutes'.format( epoch, duration / 60)) if abs(prev_loss - loss) <= args.early_stopping: logger.info( 'Early stopping. Loss changed by less than {}.'.format( args.early_stopping)) break prev_loss = loss else: # 进行测试 dev_accuracies = evaluate(dev_loader, model, args.topk) for k in args.topk: logger.info( f'Evaluation metrices: top-{k} dev accuracy = {dev_accuracies[k]}%' ) test_accuracies = evaluate(test_loader, model, args.topk) for k in args.topk: logger.info( f'Evaluation metrices: top-{k} test accuracy = {test_accuracies[k]}%' )
def nested_search(device, dir='experiment'): dir = os.path.join( dir, utility.cleanText(f"rLut-{args.rLUT}_rThroughput-{args.rThroughput}")) if os.path.exists(dir) is False: os.makedirs(dir) filepath = os.path.join( dir, utility.cleanText(f"nested_{args.episodes}-episodes")) logger = utility.get_logger(filepath) csvfile = open(filepath + '.csv', mode='w+', newline='') writer = csv.writer(csvfile) logger.info(f"INFORMATION") logger.info(f"mode: \t\t\t\t\t {'nested'}") logger.info(f"dataset: \t\t\t\t {args.dataset}") logger.info(f"seed: \t\t\t\t {args.seed}") logger.info(f"gpu: \t\t\t\t {args.gpu}") logger.info(f"number of child network layers: \t {args.layers}") logger.info(f"include batchnorm: \t\t\t {args.batchnorm}") logger.info(f"include stride: \t\t\t {not args.no_stride}") logger.info(f"include pooling: \t\t\t {not args.no_pooling}") logger.info(f"skip connection: \t\t\t {args.skip}") logger.info(f"required # LUTs: \t\t\t {args.rLUT}") logger.info(f"required throughput: \t\t\t {args.rThroughput}") logger.info(f"Assumed frequency: \t\t\t {CLOCK_FREQUENCY}") logger.info(f"training epochs: \t\t\t {args.epochs}") logger.info(f"data augmentation: \t\t\t {args.augment}") logger.info(f"batch size: \t\t\t\t {args.batch_size}") logger.info(f"controller learning rate: \t\t {args.learning_rate}") logger.info(f"architecture episodes: \t\t\t {args.episodes1}") logger.info(f"quantization episodes: \t\t\t {args.episodes2}") logger.info(f"using multi gpus: \t\t\t {args.multi_gpu}") logger.info(f"architecture space: ") for name, value in ARCH_SPACE.items(): logger.info(name + f": \t\t\t\t {value}") logger.info(f"quantization space: ") for name, value in QUAN_SPACE.items(): logger.info(name + f": \t\t\t {value}") train_data, val_data = data.get_data(args.dataset, device, shuffle=True, batch_size=args.batch_size, augment=args.augment) input_shape, num_classes = data.get_info(args.dataset) writer.writerow(["ID"] + ["Layer {}".format(i) for i in range(args.layers)] + ["Accuracy"] + [ "Partition (Tn, Tm)", "Partition (#LUTs)", "Partition (#cycles)", "Total LUT", "Total Throughput" ] + ["Time"]) arch_agent = Agent(ARCH_SPACE, args.layers, lr=args.learning_rate, device=torch.device('cpu'), skip=args.skip) arch_id, total_time = 0, 0 logger.info('=' * 50 + "Start exploring architecture space" + '=' * 50) best_arch = BestSamples(5) for e1 in range(args.episodes1): logger.info('-' * 130) arch_id += 1 start = time.time() arch_rollout, arch_paras = arch_agent.rollout() logger.info("Sample Architecture ID: {}, Sampled arch: {}".format( arch_id, arch_rollout)) model, optimizer = child.get_model(input_shape, arch_paras, num_classes, device, multi_gpu=args.multi_gpu, do_bn=args.batchnorm) backend.fit(model, optimizer, train_data, val_data, epochs=args.epochs, verbosity=args.verbosity) quan_id = 0 best_quan_reward = -1 logger.info('=' * 50 + "Start exploring quantization space" + '=' * 50) quan_agent = Agent(QUAN_SPACE, args.layers, lr=args.learning_rate, device=torch.device('cpu'), skip=False) for e2 in range(args.episodes2): quan_id += 1 quan_rollout, quan_paras = quan_agent.rollout() fpga_model = FPGAModel(rLUT=args.rLUT, rThroughput=args.rThroughput, arch_paras=arch_paras, quan_paras=quan_paras) if fpga_model.validate(): _, quan_reward = backend.fit(model, optimizer, val_data=val_data, quan_paras=quan_paras, epochs=1, verbosity=args.verbosity) else: quan_reward = 0 logger.info( "Sample Quantization ID: {}, Sampled Quantization: {}, reward: {}" .format(quan_id, quan_rollout, quan_reward)) quan_agent.store_rollout(quan_rollout, quan_reward) if quan_reward > best_quan_reward: best_quan_reward = quan_reward best_quan_rollout, best_quan_paras = quan_rollout, quan_paras logger.info('=' * 50 + "Quantization space exploration finished" + '=' * 50) arch_reward = best_quan_reward arch_agent.store_rollout(arch_rollout, arch_reward) end = time.time() ep_time = end - start total_time += ep_time best_arch.register( arch_id, utility.combine_rollout(arch_rollout, best_quan_rollout, args.layers), arch_reward) writer.writerow([arch_id] + [ str(arch_paras[i]) + '\n' + str(best_quan_paras[i]) for i in range(args.layers) ] + [arch_reward] + list(fpga_model.get_info()) + [ep_time]) logger.info(f"Reward: {arch_reward}, " + f"Elasped time: {ep_time}, " + f"Average time: {total_time/(e1+1)}") logger.info(f"Best Reward: {best_arch.reward_list[0]}, " + f"ID: {best_arch.id_list[0]}, " + f"Rollout: {best_arch.rollout_list[0]}") logger.info('=' * 50 + "Architecture & quantization sapce exploration finished" + '=' * 50) logger.info(f"Total elasped time: {total_time}") logger.info(f"Best samples: {best_arch}") csvfile.close()
parser = argparse.ArgumentParser( description='zero configuration predictic modeling script. Requires a pandas HDFS dataframe file ' + \ 'and a yaml parameter file as input as input') parser.add_argument( '-d', '--data_file', nargs=1, help= 'input pandas HDFS dataframe .h5 with an unique indentifier and a target column\n' + 'as well as additional data columns\n' 'default values are cust_id and category or need to be defined in an\n' + 'optional parameter file ') parser.add_argument('-p', '--param_file', help='input yaml parameter file') args = parser.parse_args() logger = utl.get_logger(os.path.basename(__file__)) logger.info("Program started with the following arguments:") logger.info(args) ########################################################### # set dir to project dir ########################################################### abspath = os.path.abspath(__file__) dname = os.path.dirname(os.path.dirname(abspath)) os.chdir(dname) ########################################################### # file check for the parameter ########################################################### param_file = '' if args.param_file:
from json import JSONDecodeError from time import sleep import requests from pynamodb.exceptions import QueryError, PutError, VerboseClientError from constants import DEFAULT_PARAMS, MARKET_GROUPS_URI, MARKET_TYPES_URI from models.market_group import MarketGroup from models.market_type import MarketType from utility import get_logger log = get_logger() MarketGroup.create_table(read_capacity_units=1, write_capacity_units=1) MarketType.create_table(read_capacity_units=1, write_capacity_units=1) market_groups_response = requests.get(MARKET_GROUPS_URI, params=DEFAULT_PARAMS) market_group_ids = market_groups_response.json() for market_group_id in market_group_ids: market_group_exists = False try: market_group_exists = MarketGroup.count(market_group_id) != 0 except (QueryError, KeyError) as e: pass market_group_uri = MARKET_GROUPS_URI + str(market_group_id) + '/' try: market_group_details = requests.get(market_group_uri, params=DEFAULT_PARAMS).json()
# Name of the target TARGET = 'target' # Params lgb_params = { 'objective': 'binary', 'boosting_type': 'gbdt', 'metric': METRIC, 'num_threads': N_THREADS, 'verbose': VERBOSE, 'seed': SEED, 'n_estimators': N_ESTIMATORS, 'early_stopping_rounds': EARLY_STOPPING_ROUNDS } logger = utility.get_logger(LOGGER_NAME, MODEL_NUMBER, run_id, LOG_DIR) utility.set_seed(SEED) logger.info(f'Running for Model Number {MODEL_NUMBER}') utility.update_tracking(run_id, "model_number", MODEL_NUMBER, drop_incomplete_rows=True) utility.update_tracking(run_id, "model_type", MODEL_TYPE) utility.update_tracking(run_id, "is_test", IS_TEST) utility.update_tracking(run_id, "n_estimators", N_ESTIMATORS) utility.update_tracking(run_id, "early_stopping_rounds", EARLY_STOPPING_ROUNDS) utility.update_tracking(run_id, "random_state", SEED) utility.update_tracking(run_id, "n_threads", N_THREADS) #utility.update_tracking(run_id, "learning_rate", LEARNING_RATE)
"""Provides the methods used for parsing HTML""" import urllib3 import certifi from utility import get_logger HTTP = urllib3.PoolManager(cert_reqs='CERT_REQUIRED', ca_certs=certifi.where()) LOGGER = get_logger('parser') def get_page_html(url): """Obtains the HTML from a URL. Returns the raw HTML for a given web page URL. Parameters ---------- url: str The URL of the web page that should be downloaded. Returns ------- HTML The raw HTML of the downloaded webpage. """ try: request = HTTP.request('GET', url) if request.status == 200: return request.data LOGGER.debug('ParserError: The request failed for %s', url) return None