def setUpClass(self): cfg = Config() cfg.popcon_index = "test_data/.sample_pxi" cfg.popcon_dir = "test_data/popcon_dir" cfg.clusters_dir = "test_data/clusters_dir" cfg.popcon = 0 self.rec = Recommender()
class PyToApk(object): config = None commandArgs = None def __init__(self, cmdArgs): super(PyToApk, self).__init__() self.config = Config(os.path.dirname(os.path.realpath(__file__))) self.config.parseCmdArgs(cmdArgs) self.commandArgs = cmdArgs.commandArgs def executeTask(self, task): if not self.config.validateValues(): return False command = getattr(getattr(__import__('src.commands.' + task), 'commands'), task) success = False try: success = command.run(self.config, self.commandArgs) except KeyboardInterrupt: self.config.logger.error('Cancelling build due to interrupt.') except Exception as e: import traceback self.config.logger.error('Caught exception: ' + str(e)) output = self.config.logger.getOutput() output = sys.stderr if output == sys.stdout else output traceback.print_exception(*sys.exc_info(), file=output) finally: return success
def __init__(self, name): self.name = name self.conf = Config() self.img_path = self.conf.employee_img_path(name) self.encoded_img_path = self.conf.employee_encoded_img_path(name) self.data_path = self.conf.employee_data_path(name) self.encoded_face = None self.data = {} self.in_timestamp = None self.last_timestamp = None self.load_encoding() log.info("Employee named {} was loaded".format(self.name))
def btn_corner(self): """ When a button in the corner group button is clicked Save the position in the config file """ # Save the new position btn = self.ui.group_corner.checkedButton() pos = btn.objectName() Config.set("mainwindow/position", pos) # Move the whindow QtCore.qApp.window.set_window_position(pos)
def load_config(mode=None): parser = argparse.ArgumentParser() parser.add_argument("--path", "--checkpoints", type=str, default="./checkpoints2", help="model checkpoint path, default = ./checkpoints") parser.add_argument( "--model", type=int, choices=[1, 2, 3], help="1: edge model, 2: SR model, 3: joint SR model with edge enhancer" ) parser.add_argument("--train_img_path", type=str, default="./train_images") parser.add_argument("--test_img_path", type=str, default="./test_images") parser.add_argument("--eval_img_path", type=str, default="./eval_images") if mode == 2: #parser.add_argument("--input", type = str, help = "path to a test image") parser.add_argument("--output", type=str, help="path to a output folder") args = parser.parse_args() create_data_list(args.train_img_path, args.test_img_path, args.eval_img_path, "./list_folder") config_path = os.path.join(args.path, "config.yaml") if not os.path.exists(args.path): os.makedirs(args.path) if not os.path.exists(config_path): copyfile('./config.yaml', config_path) config = Config(config_path) #train mode if mode == 1: config.MODE = 1 #test mode elif mode == 2: config.MODE = 2 #eval mode elif mode == 3: config.MODE = 3 return config
def start_mlflow(config: Config) -> Tuple[str, str]: try: mlflow.end_run() except Exception: pass if mlflow.get_experiment_by_name(config.exp_name) is None: mlflow.create_experiment(config.exp_name) experiment_id: str = mlflow.get_experiment_by_name( config.exp_name).experiment_id print("put the run name") run_name: str = input() mlflow.start_run(experiment_id=experiment_id, run_name=run_name) config.log_mlflow_params() return experiment_id, run_name
def __init__(self, asset): self.config = Config() self.config.read() self.value = 200 self.healthbar = asset["other"]["healthbar"] self.health = asset["other"]["health"] self.healthbar_rect = self.healthbar.get_rect() self.healthbar_rect.right = self.config.getRect.width - 40 self.health_rect = self.health.get_rect() self.health_rect.right = self.config.getRect.width - 43 self.health_rect.right -= 43
def move(self, direction, check_collision: bool = True) -> None: if direction == pygame.K_UP and self.rect.top > 0: self.up(check_collision) if direction == pygame.K_RIGHT and self.rect.right < Config().width: self.right(check_collision) if direction == pygame.K_DOWN and self.rect.bottom < Config().height: self.down(check_collision) if direction == pygame.K_LEFT and self.rect.left > 0: self.left(check_collision) return
def load_config(): r"""loads model config """ parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument( '--config', type=str, default='config_example.json', help= 'configuration file name. Relative path under given path (default: config.yml)' ) parser.add_argument( '--loadbest', type=int, default=0, choices=[0, 1], help= '1: load best model or 0: load checkpoints. Only works in non training mode.' ) parser.add_argument('--mode', type=str, choices=['train', 'trace', 'eval'], help='mode. can be [train,trace,eval]', required=True) args = parser.parse_args() config_path = os.path.abspath(args.config) if not os.path.exists(config_path): raise RuntimeError('Targer config file does not exist. {}' & config_path) # load config file config = Config(config_path) if 'NAME' not in config: config_name = os.path.basename(args.config) if len(config_name) > len('config_'): name = config_name[len('config_'):] name = os.path.splitext(name)[0] translation_table = dict.fromkeys(map(ord, '!@#$'), None) name = name.translate(translation_table) config['NAME'] = name config.LOADBEST = args.loadbest config.MODE = args.mode return config
def main(): parser = argparse.ArgumentParser() parser.add_argument("-c", "--config", help="Device config") arguments = parser.parse_args() if arguments.config: if not os.path.exists(arguments.config): raise RuntimeError("Config file {} doesn't exist".format(arguments.config)) elif not os.path.exists(DEFAULT_CONFIG): raise RuntimeError("Default config is missing") device = Device().find() device.handshake() hw_code = device.get_hw_code() hw_sub_code, hw_ver, sw_ver = device.get_hw_dict() secure_boot, serial_link_authorization, download_agent_authorization = device.get_target_config() if arguments.config: config_file = open(arguments.config) config = Config().from_file(config_file, hw_code) config_file.close() else: config = Config().default(hw_code) if not os.path.exists(PAYLOAD_DIR + config.payload): raise RuntimeError("Payload file {} doesn't exist".format(PAYLOAD_DIR + config.payload)) print() log("Device hw code: {}".format(hex(hw_code))) log("Device hw sub code: {}".format(hex(hw_sub_code))) log("Device hw version: {}".format(hex(hw_ver))) log("Device sw version: {}".format(hex(sw_ver))) log("Device secure boot: {}".format(secure_boot)) log("Device serial link authorization: {}".format(serial_link_authorization)) log("Device download agent authorization: {}".format(download_agent_authorization)) print() log("Disabling watchdog timer") device.write32(config.watchdog_address, 0x22000064) if serial_link_authorization or download_agent_authorization: log("Disabling protection") payload = open(PAYLOAD_DIR + config.payload, "rb") exploit(device, config.watchdog_address, config.var_0, config.var_1, payload) payload.close() log("Protection disabled")
def test_highest_config_version_chosen(): with mock.patch( 'src.config.Config.get_configs_from_dynamodb') as mock_dynamo: mock_dynamo.return_value = [ create_config(version=json.dumps('ver_1')), create_config(version=json.dumps('ver_10')), create_config(version=json.dumps('ver_2')), ] config = Config( region_name='some-aws-region', table_name='some-dynamodb-table', ) assert config.get_version() == 'ver_10'
def testBasic(self): with self.assertRaises(ValueError): _ = Config(self.config_dir / "wrong.yaml") config = Config(self.config_dir / "config-example.yaml") notifier_config = config.get_notifier_config() self.assertEqual(notifier_config["pushover"]["enable"], False) self.assertEqual(notifier_config["pushover"]["api_token"], "dummy_token") self.assertEqual(notifier_config["pushover"]["user_key"], "dummy_key") chia_logs_config = config.get_chia_logs_config() self.assertEqual(chia_logs_config["file_log_consumer"]["enable"], True) self.assertEqual(chia_logs_config["file_log_consumer"]["file_path"], "~/.chia/mainnet/log/debug.log")
def evt_mouseDoubleClickEvent(self, event): """ Set the window in the top right corner """ position = Config.get("mainwindow/position") self.set_window_position(position)
def test_op_to_order(): cfg = Config() op_dto = BitSharesOperation( op_id=43571314, order_id=uuid4(), order_type=OrderType.DEPOSIT, asset=f"{cfg.gateway_prefix}.{cfg.gateway_distribute_asset}", from_account=cfg.account, to_account=testnet_user_account, amount=0.1, status=TxStatus.RECEIVED_NOT_CONFIRMED, confirmations=0, block_num=37899972, tx_created_at=datetime.datetime.now(), error=TxError.NO_ERROR, ) tx = TransactionDTO( coin=op_dto.asset, amount=op_dto.amount, from_address=op_dto.from_account, to_address=op_dto.to_account, created_at=op_dto.tx_created_at, confirmations=op_dto.confirmations, max_confirmations=BITSHARES_NEED_CONF, )
def update(self, tick_data): next_state = 'playing' for event in self.get_events(): if event.type == PLAYER_DEATH: next_state = 'player_death' elif event.type == MAP_FINISHED: next_state = 'map_transition' elif event.type == KEYDOWN: if event.key == K_d: config = Config.get_instance() if config.debug: config.debug = False else: config.debug = True if event.key == K_r: self.game.reset_world() if event.key == K_ESCAPE and next_state is 'playing': next_state = 'paused' tick_data['debug'] = self.debug self.game.world.update(tick_data) self.game.hud.update(tick_data) return next_state
def test_access_map_missing_ugly_uri(): with mock.patch( 'src.config.Config.get_configs_from_dynamodb') as mock_dynamo: mock_dynamo.return_value = [ create_config(version=json.dumps('ver_42')), ] config = Config( region_name='some-aws-region', table_name='some-dynamodb-table', ) is_allowed = config.access_is_allowed( user='******', ugly_uri='/some-ugly-path/ugly-resource') assert is_allowed == False
def __init__(self, config: Config = None): if not config: config = Config.load() self.config = config self.already_checked: dict[Site, set[BaseVideo]] = { site: set() for site in list(Site.__members__.values()) } # Dict of dicts in the form {site: {video_id: db_id}} self.all_vids: dict[Site, dict[str, int]] = { site: {} for site in list(Site.__members__.values()) } self.all_vid_ids: dict[Site, set[str]] = { site: set() for site in list(Site.__members__.values()) } self.channel_cache: dict[Site, set[BaseChannel]] = { site: set() for site in list(Site.__members__.values()) } self.db_channel_cache: dict[Site, set[str]] = { site: set() for site in list(Site.__members__.values()) } self._yt_api = YTApi(self.config.yt_token) self.all_tags: dict[str, int] = {} self.threads: list[threading.Thread] = [] self._conn = psycopg.connect(self.config.db_conn_string, row_factory=dict_row) self.db = DbUtils(self._conn)
def main(): torch.manual_seed(12345) args = parse_args() cfg = Config.fromfile(args.config) if args.work_dir is not None: cfg.work_dir = args.work_dir _logger = init_logger(cfg.work_dir, 'INFO') _logger.info(cfg) print('before init_process') init_process(cfg.dist_config) print('after init_process') print('before build_model') model = build_model(cfg.model) print('after build_model') print('before train_dataloader') train_dataloader = get_dataloader(cfg.data.train_data, cfg.data.train_dataloader) print('after train_dataloader') val_dataloader = train_dataloader dataloaders = {'train': train_dataloader, 'val': val_dataloader} try: train_model( model, dataloaders, cfg, ) except KeyboardInterrupt: print('KeyboardInterrupt') dist.destroy_process_group()
def __init__(self): self.logs = Logs() self.data = { "backgrounds": {}, "textures": {}, "anim": {}, "other": {}, "sounds": {}, "effects": {}, "fonts": {}, "icon": False } self.config = Config() self.config.read()
def __init__(self, config_file, thread_count=1, timeout=60, seed=''): '''Initializer, Arguments: config_file the file containing configuration data. thread_count the number of worker threads to use. timeout the time workers should wait on request. seed a website for Scrappy to crawl. Throws: Currently passes all exceptions, but can caught a KeyboardInterrupt. ''' self.config_file = config_file self.config = Config() self.thread_count = thread_count self.timeout = timeout self.visited_urls = SetWrapper(set()) self.unvisited_urls = SetWrapper(set()) self.targets = SetWrapper(set(), format="{},\n") self.verbose = False self.saverate = 6 self.seed = seed
def __init__(self, tracker, data_container, processing, device=None, append_reverse=None, online_caching=None): IterableDataset.__init__(self, data_container, device=device) self.streamlines = tracker.get_streamlines() self.id = self.id + "-{}-(".format(processing.id) + tracker.id + ")" config = Config.get_config() if append_reverse is None: append_reverse = config.getboolean("DatasetOptions", "appendReverseStreamlines", fallback="yes") if online_caching is None: online_caching = config.getboolean("DatasetOptions", "onlineCaching", fallback="yes") self.options = SimpleNamespace() self.options.append_reverse = append_reverse self.options.online_caching = online_caching self.options.processing = processing if online_caching: self.cache = [None] * len(self) self.feature_shapes = None
def test_run_reports_handles_error(): reports = [ ReportConfig("nonexistent", None, ["*****@*****.**"], "subject1", 600, "a.ssignee"), ReportConfig("diagnostic", {}, ["*****@*****.**"], "subject2", 600, "a.ssignee") ] config = Config() wrapper = OrderlyWebClientWrapper(config) success = {} error = {} def success_callback(report, version): success[report.name] = version def error_callback(report, message): error[report.name] = message running_reports_repository = RunningReportsRepository() versions = run_reports(wrapper, "testGroup", "testDisease", "testTouchstone", config, reports, success_callback, error_callback, running_reports_repository) keys = list(versions.keys()) assert len(keys) == 1 assert versions[keys[0]]["published"] is True assert success["diagnostic"] == keys[0] assert "Failure for key" in error["nonexistent"] assert len(success) == 1 assert len(error) == 1
def config(): sys.argv = [ '', '--dataset', 'snips', '--metric', 'l2', '--n_way_train', '5', '--n_way_validation', '12', '--k_shot_train', '5', '--k_shot_validation', '12' ] return Config().parse_args(known_only=True)
def __init__( self, cv_num: int, config: Config, start_time: Optional[str], mlflow_on: bool = True, ): self.mlflow_on = mlflow_on self.config: Config = config self.epoch: int = 0 self.cv_num: int = cv_num self.start_time = start_time self.log_path: str = f"{self.config.WORK_DIR}/output/{self.start_time}" if start_time is not None: remove_empty_dirs(f"{self.config.WORK_DIR}/output") Path(self.log_path).mkdir(parents=True, exist_ok=True) self.best_summary_loss: float = 10 ** 5 self.train_model = self.load_train_model() self.loss_fn = get_average_meter() self.optimizer: torch.optim.Optimizer = torch.optim.AdamW( self.train_model.parameters(), lr=config.lr ) self.scheduler = config.scheduler_class( self.optimizer, **config.scheduler_params )
def get_word_id(self, word): """get the word id""" processed_word = Config.word_processing(word) if processed_word not in self.word_to_idx: return self.word_to_idx[UNKNOWN_TOKEN] return self.word_to_idx[processed_word]
def build_final(self) -> None: """Aggregate data to mail objects.""" self.__build_final_metadata = {} self.__aggregate_mails( [self._map_qid_mxin, self._map_qid_imap, self._map_msgid], [ constants.PHD_MXIN_QID, constants.PHD_IMAP_QID, constants.MESSAGEID ]) self.__aggregate_mails([self._map_qid_imap, self._map_msgid], [constants.PHD_IMAP_QID, constants.MESSAGEID]) self.__aggregate_mails([ self._map_msgid, ], [constants.MESSAGEID]) for id, mail in self._map_pickup.items(): builder = ExpressionBuilder() builder.add_field( ExpressionField(constants.PHD_IMAP_QID, id, Comparator.equal)) if len(self._repository.find(builder.expression, SearchScope.ALL)) == 0: if self.__postprocessing(mail) != ProcessorAction.DELETE: self.__process_aggregated_mail(mail) self._map_pickup.clear() # create indexes in repository self._repository.create_indexes(self.__fieldsToIndex) if Config().get('printmsgs'): print('')
def mutate(self, chance=Config.instance().mutation_chance): """Mutate the chromosome""" genes = self.genotype.mutable_genes random.shuffle(genes) for gene in genes: if random.random() < chance: self.mutate_gene(gene)
def check_load(args): """ Check the directory and weights files. Load the config file. """ if not os.path.exists(args.path): raise NotADirectoryError('Path <' + str(args.path) + '> does not exist!') edge_weight_files = list( glob.glob(os.path.join(args.path, 'EdgeModel_gen*.pth'))) if len(edge_weight_files) == 0: raise FileNotFoundError( 'Weights file <EdgeModel_gen*.pth> cannot be found under path: ' + args.path) inpaint_weight_files = list( glob.glob(os.path.join(args.path, 'InpaintingModel_gen*.pth'))) if len(inpaint_weight_files) == 0: raise FileNotFoundError( 'Weights file <InpaintingModel_gen*.pth> cannot be found under path: ' + args.path) config_path = os.path.join(args.path, 'config.yml') # copy config template if does't exist if not os.path.exists(config_path): shutil.copyfile('./config.yml.example', config_path) # load config file config = Config(config_path) return config
def main_coco(): #val_path = "data/Definitive/Hexagon/val" #val_path = "data/Definitive/Cube/val" #val_path = "data/Definitive/Octahedron/val" #val_path = "data/Definitive/Needle/val" val_path = "data/Random/Cube/val" #weights = "logs/Final/Hexagon/orientations_2900.h5" #weights = "logs/Final/Cube/orientations_2900.h5" #weights = "logs/Final/Needle/orientations_3000.h5" weights = "logs/Random/Cube/orientations_1200.h5" #weights = "logs/Final/Octahedron/orientations_2900.h5" evaluation_dir = "evaluation" config = Config() dataset_val = FiguresDataset() dataset_val.load_figures(val_path, "val_annotations.json") dataset_val.prepare() val_images, val_orientations, _ = load_figures_data(dataset_val, config, mask=False) # Loading model and weights or_model = model.OrientationModel("logs", config) or_model.compile(weights) # Inference predictions = detect.detect(or_model, val_images) gt_orientations = R.from_euler('ZYX', val_orientations, degrees=True).as_matrix() utils.evaluate(gt_orientations, predictions, dataset_val, evaluation_dir) coco_data.save_pred_annotations(predictions, dataset_val, val_path, evaluation_dir) visualize.show_results(val_images, predictions, evaluation_dir)
def schema2db(): load_dotenv(verbose=True) config = Config() schema_file_names = os.listdir(config.schemas_dir) schema_file_names.sort() con = psycopg2.connect( f'host={config.db_host} dbname={config.db_name} user={config.db_user} password={config.db_password}' ) for schema_file_name in schema_file_names: try: json_schema = json.load( open(os.path.join(config.schemas_dir, schema_file_name))) except JSONDecodeError: print("Failed to decode file as JSON. File:", schema_file_name) continue db_schema_name = schema_file_name.rsplit('.', 2)[0] print("db_schema_name =", db_schema_name) translator = JSONSchemaToPostgres(json_schema, postgres_schema=db_schema_name, debug=True) translator.create_tables(con) con.commit() con.close()
def create_app(config_class=Config): app = Flask(__name__) app.config.from_object(Config()) import src.model import src.model.users # инициализация расширений model.db.init_app(app) admin.init_app(app) login_manager.init_app(app) # blueprints from src.auth.view import auth from src.home.view import home from src.student.view import student from src.errors.handlers import errors app.register_blueprint(auth) app.register_blueprint(home) app.register_blueprint(student) app.register_blueprint(errors) with app.app_context(): model.db.create_all() return app
def __init__(self): self.__config = Config.get_config_instance() self.__task_handler = Handler() self.log = Logger.get_logger_instance() self.loop = asyncio.get_event_loop() probe.readiness = True probe.liveness = True
def main(): args = parse_args() cfg = Config.fromfile(args.config) cfg.batch_size = 1000 agedb_30, cfp_fp, lfw, agedb_30_issame, cfp_fp_issame, lfw_issame = get_val_data( args.data_root) model = build_base_model(cfg.model.base_model).to('cuda') state_dict = torch.load(args.ckpt_path)['state_dict'] load_state_dict(model, state_dict) agedb_acc, agedb_best_thresh, tpr, fpr = val( cfg, model, agedb_30, agedb_30_issame, our_normalization=args.our_norm) gen_plot(fpr, tpr, 'tools/public_val/agedb_roc_curve.jpg') print('agedb_acc={}, agedb_best_thresh={}'.format(agedb_acc, agedb_best_thresh)) cfp_acc, cfp_best_thresh, tpr, fpr = val(cfg, model, cfp_fp, cfp_fp_issame, our_normalization=args.our_norm) gen_plot(fpr, tpr, 'tools/public_val/cfg_roc_curve.jpg') print('cfp_fp_acc={}, cfp_best_thresh={}'.format(cfp_acc, cfp_best_thresh)) lfw_acc, lfw_best_thresh, tpr, fpr = val(cfg, model, lfw, lfw_issame, our_normalization=args.our_norm) gen_plot(fpr, tpr, 'tools/public_val/lfw_roc_curve.jpg') print('lfw_acc={}, lfw_best_thresh={}'.format(lfw_acc, lfw_best_thresh))
def main(): reload(sys) sys.setdefaultencoding('utf8') parser = argparse.ArgumentParser(description="Processing of messages protobuf") parser.add_argument('CONFIG_DAEMON', type=str) config_file = "" try: args = parser.parse_args() config_file = args.CONFIG_DAEMON except argparse.ArgumentTypeError: print("Bad usage, learn how to use me with %s -h" % sys.argv[0]) sys.exit(1) config_data = Config() config_data.load(config_file) daemon = Daemon(config_data) daemon.run() sys.exit(0)
class Environment: """ Benchmark environment. Responsible for bookkeeping and collecting results. """ def __init__(self): self.config = Config() self.files = [] self.file_writer = None self.results = RunResult() self.analyzer = None def exec(self, config_file_name): self.init_env(config_file_name) self.collect_test_files() self.print_info() self.run_processes() def init_env(self, config_file): self.config.read_config_file(config_file) def analyze(self): self.analyzer = ResultProcessor(self.results, self.config) self.analyzer.write_result_files() self.analyzer.print_summary() def print_info(self): self.config.print() self.print_file_list() def collect_test_files(self): """ Searches recursively in the test folder for .sil files. These files are filtered against the ignore list and constitute all programs to be benchmarked. :return: None """ for root, dirs, files in os.walk(self.config.testFolder): bench_files = [os.path.normpath(os.path.join(root, f)) for f in files if f.endswith('.sil') and not os.path.join(root, f).endswith(tuple(self.config.ignoreList))] self.files.extend(bench_files) def print_file_list(self): print(str(len(self.files)) + " file(s) included in the benchmark.") if self.config.list_files: for file in self.files: print(" " + file) print() def run_processes(self): """ Runs all the benchmarks. :return: None """ i = 1 total = len(self.files) * len(self.config.run_configurations) * self.config.repetitions for file in self.files: for run_config, config_name in zip(self.config.run_configurations, self.config.run_config_names): runner = ProcessRunner(run_config, file, config_name, self.config) timings = runner.run(i, total) self.results.add_results(timings) i += self.config.repetitions
def __init__(self, cmdArgs): super(PyToApk, self).__init__() self.config = Config(os.path.dirname(os.path.realpath(__file__))) self.config.parseCmdArgs(cmdArgs) self.commandArgs = cmdArgs.commandArgs
def __init__(self): self.config = Config() self.files = [] self.file_writer = None self.results = RunResult() self.analyzer = None
def start(): '''Start the Application''' log = configure_log() log.info('Starting Cloud Worker Node Agent') log.info('--------------------------') args = parse_args() settings = {'base_url': args.server, 'secret': args.secret, 'client_id': C.CLIENT_ID, 'client_secret': C.CLIENT_SECRET, 'username': C.USERNAME, 'password': C.PASSWORD} server = Server(settings) node = Node(server) #Send the hostname, ip etc to the server node.send_info() #Update the node status to ready node.update_node_status(C.STATUS_READY) #Get Config config = Config(server, node) actions = Action(server, node) processor = Processor() workers = Worker(server, node, processor) output = Output(server, node) finished = False #Loop forever (kind of) while not finished: log.info('Looping') log.info('--------------------------') #Update last seen date node.update_node_date() #Get config config.refresh() #Get actions num_pending = actions.get_pending() #Respond to actions if actions.has_pending(): message = 'Responding to %d Actions ...' % num_pending output.send(message) actions.respond_to_pending() #Get workers/commands workers.refresh() workers.process_workers() #TODO #Respond to/run commands #Send output to server log.info('Sleeping for %d seconds ...', config.get(C.CONFIG_POLL_PERIOD)) time.sleep(config.get(C.CONFIG_POLL_PERIOD))
class Scrappy(object): '''Scrappy is an object that represents the spider itself. The main run method will block the current thread. The work is done by UrlMiners and the RecordingMiner. ''' def __init__(self, config_file, thread_count=1, timeout=60, seed=''): '''Initializer, Arguments: config_file the file containing configuration data. thread_count the number of worker threads to use. timeout the time workers should wait on request. seed a website for Scrappy to crawl. Throws: Currently passes all exceptions, but can caught a KeyboardInterrupt. ''' self.config_file = config_file self.config = Config() self.thread_count = thread_count self.timeout = timeout self.visited_urls = SetWrapper(set()) self.unvisited_urls = SetWrapper(set()) self.targets = SetWrapper(set(), format="{},\n") self.verbose = False self.saverate = 6 self.seed = seed def load(self): '''Load all configuration tokens and assign them. Throws: Shouldn't throw any exceptions, but may fail to load a file if the file doesn't exist. ''' print('\nLoading =>\n') self.config.load(self.config_file) self.target_file = self.config['target_file'] self.visited_file = self.config['visited_file'] self.unvisited_file = self.config['unvisited_file'] print('\t\tConfig File: {}'.format(self.config_file)) print('\t\tTarget File: {}'.format(self.target_file)) print('\t\tUnvisited File: {}'.format(self.unvisited_file)) print('\t\tVisited File: {}'.format(self.visited_file)) print('\t\tTimeout: {}'.format(self.timeout)) print('\t\tSave rate: {}'.format(self.saverate)) print('\t\tThread count: {}'.format(self.thread_count)) if self.seed: self.unvisited_urls.add(self.seed) if isfile(self.unvisited_file): with open(self.unvisited_file, 'r') as infile: for url in infile: if url: self.unvisited_urls.add(url) print('\n\t\tLoaded {} ' 'unvisited sites.'.format(len(self.unvisited_urls.data))) if isfile(self.visited_file): with open(self.visited_file, 'r') as infile: for url in infile: if url: self.visited_urls.add(url) print('\t\tLoaded {} ' 'visited sites.'.format(len(self.visited_urls.data))) if isfile(self.target_file): with open(self.target_file, 'r') as infile: for target in infile: # -2, cut off new line and , self.targets.add(target[:len(target) - 2]) print('\t\tLoaded {} ' 'targets.'.format(len(self.targets.data))) def run(self): '''This is the main method which blocks the current thread's execution. The thread will block until the queue is empty. If the queue is never empty then the thread will never unblock. You can use a keyboard interrupt to save and end. Throws: Shouldn't throw anything... we hope. ''' start_time = time() print('\nRunning =>\n') print('\t\tStarted at {}\n'.format(TimeStamp().now())) try: url_queue = Queue() for url in self.unvisited_urls.data: url_queue.put(url) for i in range(0, self.thread_count): thread = UrlMiner(url_queue, self.unvisited_urls, self.visited_urls, self.targets, self.timeout) thread.setDaemon(True) thread.verbose = self.verbose thread.start() recorder = RecordingMiner([[self.unvisited_urls,self.unvisited_file], [self.visited_urls, self.visited_file], [self.targets, self.target_file]], interval=self.saverate) recorder.setDaemon(True) recorder.start() while not url_queue.empty(): sleep(10) recorder.running = False except KeyboardInterrupt: recorder.running = False recorder.save() print('\nEnding =>\n' '\t\tElapsed time {0:.2f} ' 'minutes.'.format((time() - start_time) / 60)) print('\t\tScraped {} sites.'.format(len(self.visited_urls.data))) print('\t\tUnvisited: ' '{}\n\t\tVisited: {}'.format(len(self.unvisited_urls.data), len(self.visited_urls.data))) print('\t\tTargets: {}'.format(len(self.targets.data)))