def get_all(resource_group_name: str): common.print_info( f"Fetching resources in ResourceGroup {resource_group_name}") command_result = common.shell_exec( f"az resource list -g {resource_group_name}") resources = json.loads(command_result.stdout) return resources
def test_connections(): '''Attempts a connection to each enrollment server. Returns true if it can connect to at least one, false otherwise.''' # Default: test_result = False # Retry counts: retry_count = 10 # Iterate over members in ENROLLMENT_SERVER_URIS_ARRAY: for server_uri in config_site.ENROLLMENT_SERVER_URIS_ARRAY: if test_result: break common.print_info("Testing connection to server %s..." % server_uri) for i in range(0,retry_count): # Assume unverified context: server_response = None try: ssl_context = ssl.create_default_context() ssl_context.check_hostname = False ssl_context.verify_mode = ssl.CERT_NONE server_response = urllib.urlopen(server_uri,None,context=ssl_context) if server_response: test_result = True break except IOError: pass # Delay for next try: common.print_error("Could not contact %(server)s. Delaying 10 seconds (try %(attempt)s of %(retries)s)." % {"server":server_uri,"attempt":str(i+1),"retries":str(retry_count)}) time.sleep(10) # Return: return test_result
def read_client_identity(): '''Loads the private key and certificate objects as read from the client identity PEM file. Returns a pair of objects (key,cert) or None if something bad happened.''' common.print_info("Loading identity file...") # Check for missing client identity: if not os.path.exists(config_paths.CLIENT_IDENTITY_INSTALLED_FILE_PATH): common.print_error("No client identity file found at %s." % config_paths.CLIENT_IDENTITY_INSTALLED_FILE_PATH) return None # Read and load PKI material from the client identity: file_object = open(config_paths.CLIENT_IDENTITY_INSTALLED_FILE_PATH, 'r') file_contents = file_object.read() file_object.close() try: cert = crypto.load_certificate(crypto.FILETYPE_PEM, file_contents) except crypto.Error: common.print_error("Could not read the certificate from %s." % config_paths.CLIENT_IDENTITY_INSTALLED_FILE_PATH) cert = None try: key = crypto.load_privatekey(crypto.FILETYPE_PEM, file_contents) except crypto.Error: common.print_error("Could not read the private key from %s." % config_paths.CLIENT_IDENTITY_INSTALLED_FILE_PATH) key = None # Return PKI materials: return key, cert
def run(self): common.print_verbose("Running " + self.name + " action") exit_code = 0 for test_dir in glob.iglob('**/test', recursive=True): original_working_directory = os.getcwd() run_directory = os.path.join(original_working_directory, str(test_dir)) common.print_info("Running tests in " + str(run_directory)) common.print_verbose("Changing directory to " + str(run_directory)) os.chdir(run_directory) tests = [] for filename in glob.iglob('**/*.py', recursive=True): tests.append(filename) command = ['/usr/local/bin/python3', '-m', 'unittest'] command.extend(tests) subprocess_exit_code, output = common.run_command(command) if subprocess_exit_code != common.SUCCESS: exit_code = common.FAILED common.print_verbose(output) common.continue_if_failed(subprocess_exit_code, output) common.print_verbose("Changing directory to " + str(original_working_directory)) os.chdir(original_working_directory) return exit_code, ""
def load_file(self, filepath, realpath): self.filepath = filepath self.filepath_hash = sha1(filepath.encode('utf-8')).hexdigest() self.realpath = realpath self.filename = os.path.basename(realpath) print_info( '【{filename}】正在校检文件中,耗时与文件大小有关'.format(filename=self.filename)) self.hash = common.get_hash(self.realpath) self.filesize = os.path.getsize(self.realpath) self.part_info_list = [] for i in range(0, math.ceil(self.filesize / self.chunk_size)): self.part_info_list.append({'part_number': i + 1}) message = '''================================================= 文件名:{filename} hash:{hash} 文件大小:{filesize} 文件路径:{filepath} ================================================= '''.format(filename=self.filename, hash=self.hash, filesize=self.filesize, filepath=self.realpath) print_info(message)
def _get_job_rule(pool: happybase.ConnectionPool, job_name) -> crawler.CrawlJobCore: ''' 获取 hbase 里的 crawl_job_core (爬取规则) ''' with pool.connection() as conn: try: conn: happybase.Connection table = conn.table(job_name) row = table.row(rule_row_key, columns=[ rule_col, ]) rule = row[bytes(rule_col, encoding="utf-8")].decode("utf-8") # _json_str = row.values # print(rule) common.print_info("get crawl rule: {}".format(rule)) crawl_job_core = crawler.CrawlJobCore.loads(rule) # TODO 键 有点问题 return crawl_job_core except Exception as e: common.print_exception(e) return None pass finally: conn.close() # 关闭连接
def filter_default_groups(groups): common.print_info("[Filter] Filtering Default ResourceGroups") result_list = [] for group in groups: if str.startswith(group["name"], "Default"): result_list.append(group) return result_list
def exclude_region(groups, region: str): common.print_info(f"[Filter] Excluding ResourceGroups by region: {region}") result_list = [] for group in groups: if group["location"] != region: result_list.append(group) return result_list
def save_fn(self, layer: int, crawl_job_core: CrawlJobCore, url: str, result_list: list): ''' 该函数用于保存爬取的数据 ''' # 如果该任务在 close_set 里,说明它被手动关闭了 # assert not CLOSE_SET.is_member(crawl_job_core.name) if CLOSE_SET.is_member(crawl_job_core.name): common.print_info("this crawl_job has been closed: {}".format( crawl_job_core.name)) return assert layer >= 0 and layer < crawl_job_core.layer_cnt() # 如果到了最后一层,应该存数据到外村数据库 if layer == crawl_job_core.layer_cnt() - 1: flag = DB.save_results(crawl_job_core, url, result_list) if not flag: common.print_info("failed to save results") # 否则认为是中间结果,加入队列 # 中间结果只支持 url else: # 首先判断是否结果为url for url in result_list: assert common.urltools.check_url(url) # 加入队列 _job_name = crawl_job_core.name _layer = layer + 1 for url in result_list: # 这里的 task 我设置成 url 仅一个,只是为了方便 task = CrawlTaskJson(_job_name, _layer, [url]) task_json = task.get_json() # 加入队列 QUEUE.put(task_json)
def exclude_default_resource_groups(groups): common.print_info(f"[Filter] Excluding Default ResourceGroups") result_list = [] for group in groups: if not str.startswith(group["name"], "Default"): result_list.append(group) return result_list
def run_task_fetcher(self): ''' 从 redis 中获取 任务 ''' while not self.end_flag: try: obj_tuple = QUEUE.get_wait(timeout=self.timeout) # print(obj) if obj_tuple is None: # 取出为空,说明超时了 continue _, content = obj_tuple task_info = CrawlTaskJson.from_json_str(content) # 如果该任务在 close_set 里,说明它被手动关闭了 if CLOSE_SET.is_member(task_info.job_name): common.print_info( "this crawl_job has been closed: {}".format( task_info.job_name)) continue # 判断是否为合法url for url in task_info.urls: assert common.urltools.check_url(url) # 阻塞一段时间,防止其他节点抢不到 time.sleep(0.5) self.add_urls(task_info.job_name, task_info.layer, task_info.urls) except Exception as e: common.print_exception(e)
def main(): init.parse_arguments() init.print_intro() init.check_python_version() init.check_azure_installation() init.check_azure_cli_version() init.check_if_logged() all_groups = resource_groups.get_all() region_groups = resource_groups.filter_by_region( all_groups, common.params['AZURE_REGION']) default_legacy_groups = resource_groups.filter_default_groups( region_groups) standard_groups = resource_groups.exclude_default_resource_groups( region_groups) # Delete resources created with new ResourceManager mode # https://docs.microsoft.com/en-us/azure/azure-resource-manager/resource-manager-deployment-model resource_groups.delete_many_parallel(standard_groups) # Delete resources created with classic (legacy) mode # TODO Consider: Delete Default-Storage-Group and recreate it empty for group in default_legacy_groups: resource_groups.delete_all_resources(group) common.print_info("Done") exit(0)
def create(self, parent_file_id): create_data = { "drive_id": self.drive_id, "part_info_list": self.part_info_list, "parent_file_id": parent_file_id, "name": self.filename, "type": "file", "check_name_mode": "auto_rename", "size": self.filesize, "content_hash": self.hash, "content_hash_name": 'sha1' } # 覆盖已有文件 if DATA['OVERWRITE']: create_data['check_name_mode'] = 'refuse' request_post = requests.post( 'https://api.aliyundrive.com/v2/file/create', data=json.dumps(create_data), headers=self.headers, verify=False) requests_post_json = request_post.json() self.check_auth(requests_post_json, lambda: self.create(parent_file_id)) # 覆盖已有文件 if DATA['OVERWRITE'] and requests_post_json.get('exist'): if self.recycle(requests_post_json.get('file_id')): print_info('【%s】原有文件回收成功' % self.filename) print_info('【%s】重新上传新文件中' % self.filename) return self.create(parent_file_id) self.part_upload_url_list = requests_post_json.get( 'part_info_list', []) self.file_id = requests_post_json.get('file_id') self.upload_id = requests_post_json.get('upload_id') return requests_post_json
def complete(self, file_id, upload_id): complete_data = { "drive_id": self.drive_id, "file_id": file_id, "upload_id": upload_id } complete_post = requests.post( 'https://api.aliyundrive.com/v2/file/complete', json.dumps(complete_data), headers=self.headers, verify=False ) complete_post_json = complete_post.json() if complete_post_json.get('code') == 'AccessTokenInvalid': print_info('AccessToken已失效,尝试刷新AccessToken中') if self.token_refresh(): print_info('AccessToken刷新成功,返回创建上传任务') self.complete(file_id, upload_id) return print_error('无法刷新AccessToken,准备退出') exit() s = time.time() - self.start_time if 'file_id' in complete_post_json: print_success('【{filename}】上传成功!消耗{s}秒'.format(filename=self.filename, s=s)) return True else: print_warn('【{filename}】上传失败!消耗{s}秒'.format(filename=self.filename, s=s)) return False
def create(self, parent_file_id): create_data = { "auto_rename": True, "content_hash": self.hash, "content_hash_name": 'sha1', "drive_id": self.drive_id, "hidden": False, "name": self.filename, "parent_file_id": parent_file_id, "type": "file", "size": self.filesize } create_post = requests.post( 'https://api.aliyundrive.com/v2/file/create', data=json.dumps(create_data), headers=self.headers, verify=False ) create_post_json = create_post.json() if create_post_json.get('code') == 'AccessTokenInvalid': print_info('AccessToken已失效,尝试刷新AccessToken中') if self.token_refresh(): print_info('AccessToken刷新成功,返回创建上传任务') return self.create(parent_file_id) print_error('无法刷新AccessToken,准备退出') exit() return create_post_json
def main(): try: if sys.argv[1]: url = sys.argv[1] except IndexError: print_error( "first command line argument must be DCSO Portal API endpoint") sys.exit(1) client = APIClient(api_url=url) if not client.is_alive(): print_error(f"failed using API {url}", exit=1) else: print_info(f"API {url} ready.") try: username = prompt_input( prompter("Username", symbol='\N{BUST IN SILHOUETTE}')) password = prompt_input(prompter("Password", symbol='\N{KEY}'), echo=False) except KeyboardInterrupt: print() print_info("Authentication cancelled") sys.exit(0) if not (username and password): print_error("need both username and password", exit=1) return try: auth = sign_on(client, username, password) except PortalException as exc: print_error(str(exc), exit=1) else: print( f"Your User Token expires {auth.token.expires}:\n{auth.token.token}" ) if auth.totp_activated: print(f"TOTP Activated on: {auth.totp_activated.strftime('%c')}") client.token = auth.token.token try: perms = client.auth.user_service_permissions() except PortalException as exc: print_error(str(exc)) else: print("\nYour Permissions") print("-------------------") for perm in iter(perms): print(f"{perm.service}: {perm.slug}") print(f"\nAccess as TDH Coordinator: {perms.have('tdh-access-admin')}") # graphql_execute returns named tuples response = client.execute_graphql( query='{ auth_user { id organization { shortCode } } }') print( f"Organization ShortCode: {response.auth_user.organization.shortCode}")
def delete_all_resources(group): group_name = group["name"] resource_list = resources.get_all(group_name) common.print_info(f"Delete resources in group {group_name} started") for res in resource_list: resources.delete(res) common.print_info(f"Delete resources in group {group_name} finished") """
def check_auth(self, response_json, func): if response_json.get('code') == 'AccessTokenInvalid': print_info('AccessToken已失效,尝试刷新AccessToken中') if self.token_refresh(): print_info('AccessToken刷新成功,返回创建上传任务') return func() print_error('无法刷新AccessToken,准备退出') sys.exit()
def test_PDC_general_nodejs(): print_info(sys._getframe().f_code.co_name) parser = PDCParserGeneral("nodejs", "8") deps = parser.generateDepModules() print_info(deps) assert 'platform' in deps assert 'host' in deps assert 'python2' in deps assert 'python3' in deps
def get_tag(self, name): name = '# %s' % name tag_found = True if not self.help_md: print_info("help md does not exist.") return False if not [x for x in self.help_md if name.upper() in x]: tag_found = False return tag_found
def compile_domains(): common.print_info("Domains: " + ", ".join(common.domains)) for domain in common.domains: dependencies.ensure_ready(domain, "main") print "Compiling " + domain compile_by_domain_and_scope(domain, "main") dependencies.ensure_ready(domain, "unit-tests") compile_by_domain_and_scope(domain, "unit-tests")
def test_PDC_ODCS_nodejs(): print_info(sys._getframe().f_code.co_name) parser = PDCParserODCS("nodejs", "8") # TODO: need to setup MTF_ODCS variable with odcs token, and ODCS version at least 0.1.2 # or your user will be asked to for token interactively if get_odcs_auth(): print_info(parser.get_repo()) #test_PDC_ODCS_nodejs()
def networksetup_detect_network_hardware(): '''Detects network hardware via /usr/sbin/networksetup.''' try: subprocess.check_call(['/usr/sbin/networksetup', '-detectnewhardware']) common.print_info("Ran networksetup to detect network hardware.") time.sleep(10) # Let the system get IP addresses... return True except subprocess.CalledProcessError: common.print_error("Error while running networksetup to detect network hardware.") return False
def ensure_ready(domain, scope): load_dependency_versions() deps = dependencies(domain, scope) if len(deps) > 0: common.print_info("Checking dependencies for " + domain + ": ") for dependency, version in deps.iteritems(): if version != "local": (group_id, artifact_id) = dependency.split(",") d = Dependency(domain, scope, group_id, artifact_id, version) d.recursive_fetch()
def ntpdate(given_server): '''Updates clock via NTP.''' try: subprocess.check_call(['/usr/sbin/ntpdate', '-u', given_server]) common.print_info("Ran ntpdate to update the clock: %s" % given_server) return True except subprocess.CalledProcessError: common.print_error("Error while running ntpdate to update the clock.") return False
def systemsetup_set_time_zone(given_locale): '''Sets the time zone to the given locale using /usr/sbin/systemsetup.''' try: subprocess.check_call(['/usr/sbin/systemsetup', '-settimezone', given_locale]) common.print_info("Ran systemsetup to set the time zone: %s" % given_locale) return True except subprocess.CalledProcessError: common.print_error("Error while running systemsetup to set the time zone.") return False
def work(self, driver, url, layer: int): ''' work 方法是爬虫运行部分 是实际上的 crawl_task 利用 selenium 的 webdriver 爬取数据 driver : selenium 的 WebDriver,用于控制浏览器 url : 是要爬去的 url layer : 是当前要爬去的层级 ''' # 首先检查 url 合法性和 layer 合法性 assert common.urltools.check_url(url) assert isinstance( layer, int) and layer >= 0 and layer < self.core.layer_cnt() selectors = self.selectors[layer] reg = self.core.rules[layer]["reg"] # 打印开始信息 common.print_info("[Task start: layer {}] --> CrawlJob({}): {}".format( layer, self.core.name, url)) # driver:webdriver.firefox.webdriver.WebDriver driver.get(url) result_list = [] if len(selectors) <= 0: content = driver.page_source if reg is not None: result_list.extend(reg.findall(content)) else: result_list = [content] else: target = driver selector: Selector for selector in selectors: target = selector.select(target) for ele in target: # content = ele.get_attribute("innerHTML") content = ele.get_attribute("outerHTML") if reg is not None: result_list.extend(reg.findall(content)) else: result_list.append(content) # 如果当前层级不是最后一个,需要保证结果为合法的url if layer != self.core.layer_cnt() - 1: for url in result_list: assert common.urltools.check_url(url) # 调用 save 函数 self.save_fn(layer=layer, crawl_job_core=self.core, url=url, result_list=result_list) # 打印结束信息 common.print_info("[Task done: layer {}] ==> CrawlJob({}): {}".format( layer, self.core.name, url))
def defaults_delete(given_key, given_plist): '''Deletes given key from given plist by calling defaults Useful for binary plists.''' try: subprocess.check_call(['/usr/bin/defaults', 'delete', given_plist, given_key]) common.print_info("Deleted %(k)s from %(p)s." % {"k":given_key,"p":given_plist}) return True except subprocess.CalledProcessError: common.print_error("Error clearing key %(k)s from %(p)s." % {"k":given_key,"p":given_plist}) return False
def process_response_as_xml(given_server_response): '''Retrieves the main dictionary from given response. If not possible, it uses a blank dictionary. Ensures that some essential keys are set in all cases.''' # Default: response_dict = {} # If response is not None: if given_server_response: common.print_info("Processing response for XML content.") try: response_dict = plistlib.readPlistFromString(given_server_response.read()) common.print_info("Response is a valid XML property list.") except xml.parsers.expat.ExpatError, NameError: common.print_error("Response is not an XML property list!")
def caffeinate_system(): '''Prohibits system sleep by starting /usr/bin/caffeinate.''' try: subprocess.Popen(['/usr/bin/caffeinate', '-d', '-i', '-m', '-s']) common.print_info("Started caffeination.") return True except subprocess.CalledProcessError: common.print_error("Could not start caffeination.") return False
def fetch(self): common.print_info_no_eol( " " + self.group_id + "," + self.artifact_id + ": " + self.version + "... ") common.print_verbose('') common.print_verbose('URL: ' + self.remotelocation("jar")) common.print_verbose('Local: ' + self.locallocation("jar")) if not self.islatest(): self.forcefetch("jar") self.forcefetch("pom") self.forcefetch("pom.asc") self.forcefetch("jar.asc") common.print_info("downloaded") else: common.print_info("exists")
def load_file(self, filepath, realpath): self.start_time = time.time() self.filepath = filepath self.realpath = realpath self.filename = os.path.basename(realpath) print_info('【{filename}】正在校检文件中,耗时与文件大小有关'.format(filename=self.filename)) self.hash = get_hash(self.realpath) self.filesize = os.path.getsize(self.realpath) message = '''================================================= 文件名:{filename} hash:{hash} 文件大小:{filesize} 文件路径:{filepath} ================================================= '''.format(filename=self.filename, hash=self.hash, filesize=self.filesize, filepath=self.realpath) print_info(message)
def templateTest(self, testname, testlines, method="run"): """ Defines multiline Bash snippet tests part of the ``tests/generated.py`` file. """ self.output = self.output + """ def test_%s(self): self.start() """ % testname for line in testlines: # only use shell=True for runHost() calls, otherwise variables etc. # get expanded too early, i.e. on the host self.output = self.output + \ ' self.%s(""" %s """, shell=%r)\n' % ( method, line, method == "runHost") print_info("Added test (runmethod: %s): %s" % (method, testname))
def add_urls(self, crawl_job_name: str, layer: int, urls: Iterable): # 如果该任务在 close_set 里,说明它被手动关闭了 if CLOSE_SET.is_member(crawl_job_name): common.print_info( "this crawl_job has been closed: {}".format(crawl_job_name)) return # self.C.add_urls(crawl_job_name, urls) job_core: CrawlJobCore job_core = self.job_lru_cache.get(crawl_job_name) if job_core is None: # 如果缓存中没有,从数据库获取 job_core = DB.get_job_rule(crawl_job_name) if job_core is None: raise CrawlJobException( "No such crawl job: {}".format(crawl_job_name)) self.job_lru_cache.put(crawl_job_name, job_core) self.C.add_urls(job_core, layer, urls)
def parse(self, pom_location): pom = xml.parse(pom_location) in_plugins = False for elem in pom.iter(): if(elem.tag == "{http://maven.apache.org/POM/4.0.0}plugins"): in_plugins = True if(in_plugins and elem.tag == "plugins"): in_plugins = False if(not in_plugins and elem.tag == "{http://maven.apache.org/POM/4.0.0}dependencies"): for dep in elem.findall("{http://maven.apache.org/POM/4.0.0}dependency"): group_id = dep.find("{http://maven.apache.org/POM/4.0.0}groupId").text artifact_id = dep.find("{http://maven.apache.org/POM/4.0.0}artifactId").text version = get_text(dep.find("{http://maven.apache.org/POM/4.0.0}version")) self.direct_dependencies[group_id + "," + artifact_id] = version common.print_info(" " + group_id + ", " + artifact_id + ", " + version) group_id = artifact_id = version = ""
def run_tests_for(domain, scope): common.print_info_no_eol("Running " + scope + " for " + domain + "...") classpath = dependencies.classpath_for(domain, scope) test_classes_as_string = test_classes_for(domain, scope) if test_classes_as_string.strip() != "": run_tests_command = "java -cp " + classpath + " org.junit.runner.JUnitCore " + test_classes_as_string common.print_verbose("Running tests with:") common.print_verbose(run_tests_command) (exit_code, output) = common.run_command(run_tests_command) if exit_code == 0: common.print_info(" PASSED.") else: common.print_info(" FAILED.") else: common.print_info(" No tests found.")
def show_dependencies(): global dependency_versions common.print_info("Dependency Versions:") for dependency, version in dependency_versions.iteritems(): common.print_info(dependency + ": " + version)
def test_print_info(self): common.set_log_level("info") common.print_raw = MagicMock() s = 'hello world' common.print_info(s) common.print_raw.assert_called_with('INFO: hello world')