def cppcheck(files): abort = False source_patterns = common.get_option('cppcheck-hook.source-patterns', default='*.cpp *.cxx *.c').split() header_patterns = common.get_option('cppcheck-hook.header-patterns', default='*.hpp *.hxx *.h').split() code_patterns = source_patterns + header_patterns global CPPCHECK_PATH if common.g_cppcheck_path_arg is not None and len( common.g_cppcheck_path_arg) > 0: CPPCHECK_PATH = common.g_cppcheck_path_arg else: CPPCHECK_PATH = common.get_option('cppcheck-hook.cppcheck-path', default=CPPCHECK_PATH, type='--path').strip() if check_cppcheck_install(): common.error('Failed to launch cppcheck.=') return True repoRoot = common.get_repo_root() for f in files: if any(fnmatch(f.path.lower(), p) for p in code_patterns): if not common.binary(f.contents): file = os.path.join(repoRoot, f.path) abort = check_file(file) or abort return abort
def filesize(files): abort = False limit = int(common.get_option('filesize-hook.max-size', default=1024**2)) check_all_files = common.get_option('filesize-hook.type', "all").strip().lower() != "binary" too_big_files = [] common.note('Checking files size...') count = 0 for f in files: check_file = check_all_files or common.binary(f.contents) if check_file: common.trace('Checking ' + str(f.path) + ' size...') count += 1 if f.size > limit: too_big_files.append(f) common.note('%d file(s) checked.' % count) if too_big_files: common.error(WARNING % limit) for f in too_big_files: common.error(FILEWARN % (f.path, f.size, limit)) abort = True return abort
def __init__(self, cur): PathBuilder.__init__(self, cur, int(get_option('path_cache_high_mark', "2000")), int(get_option('path_cache_low_mark', "1000"))) print("resetting yields...") self.cur.execute("""update nodes set yield=0""")
def __init__(self, cur): VolumeHolder.__init__(self) CursorWrapper.__init__(self, cur) inst_name = get_option("instance", None) self.inst_id = get_instance_id(cur, inst_name) self.alt_repre = get_option("storage_alternative", None) self.mime_type = get_option("server_mime_type", None) self.check_locality = get_option("server_check_locality", True)
def _do_serve(self, bridge, url_id, headers_flag): volume_id = bridge.get_volume_id(url_id) if headers_flag: sz = bridge.get_headers_size(url_id, volume_id) ct = "text/plain" else: sz = bridge.get_body_size_ex(url_id, volume_id) ct = bridge.get_content_type(url_id, volume_id) compress_threshold = int(get_option("compress_threshold", "100")) # chunked encoding would be better than not compressing large # files, but it's too complicated, and they probably don't # exist anyway... no_compress_threshold = int( get_option("no_compress_threshold", "100000")) reader = None if sz: if headers_flag: reader = bridge.open_headers(url_id, volume_id) else: reader = bridge.open_page_ex(url_id, volume_id) if reader is None: # headers are optional (e.g. drive.py doesn't store # them); for bodies, assuming redirect self.send_error(204, "No content") return try: assert sz compressed = None if (sz >= compress_threshold) and (sz <= no_compress_threshold) and ( 'accept-encoding' in self.headers) and ( 'gzip' in self.headers['accept-encoding']): compressed = self._encode_content(reader) self.send_response(200) self.send_header('Content-Type', ct) if compressed: self.send_header('Content-Encoding', 'gzip') self.send_header('Content-Length', len(compressed)) else: self.send_header('Content-Length', sz) self.end_headers() if compressed: self.wfile.write(compressed) self.wfile.flush() else: shutil.copyfileobj(reader, self.wfile) finally: reader.close()
def __init__(self, cur): VolumeHolder.__init__(self) HostCheck.__init__(self, cur) self.mem_cache = MemCache( int(get_option('parse_cache_high_mark', "2000")), int(get_option('parse_cache_low_mark', "1000"))) top_protocols = get_option('top_protocols', 'http https') self.protocols = set(re.split('\\s+', top_protocols))
def __init__(self, single_action, conn, cur): DownloadBase.__init__(self, conn, cur, single_action) self.br = None self.notification_threshold = int( get_option('drive_notification_threshold', "500")) self.drive_headless = get_option('drive_headless', False) self.socks_proxy_host = get_option('socks_proxy_host', None) self.socks_proxy_port = int(get_option('socks_proxy_port', "0")) self.download_dir = os.path.join(get_parent_directory(), "down") if not os.path.exists(self.download_dir): os.makedirs(self.download_dir)
def __init__(self, config): self._host = get_option('mongo', 'host', config) self._port = int(get_option('mongo', 'port', config)) self._server = '%s:%s' % (self._host, self._port) try: self.conn = pymongo.MongoClient(self._server) except Exception as err: error('failed to connect to Mongo instance: %s' % str(err)) raise err self.db = self.conn['omnibus']
def __init__(self, config): self.host = get_option('redis', 'host', config) self.port = int(get_option('redis', 'port', config)) self.database = int(get_option('redis', 'db', config)) self.ttl = 999999 try: self.db = Redis(db=self.database, host=self.host, port=self.port, socket_timeout=None) except: self.db = None
def __init__(self, cur): self.cur = cur self.volume_id = None self.member_count = 0 self.zip_front = None self.zip_back = None self.volume_threshold = int( get_option('volume_threshold', str(1024 * 1024 * 1024))) self.compress_backoff = int(get_option('compress_backoff', str(3600))) inst_name = get_option("instance", None) self.inst_id = get_instance_id(cur, inst_name)
def forbidtoken(files, config_name): include_patterns = common.get_option('forbidtoken-hook.' + config_name, default=tr[config_name][2]).split() common.note('Checking for "' + config_name + '" tokens on ' + ', '.join(include_patterns) + ' files') abort = False token = tr[config_name][0] line_iter = lambda x: enumerate(re.finditer(".*\n", x, re.MULTILINE), 1) line_match = lambda test, x: (n for n, m in line_iter(x) if test(m.group())) count = 0 for f in files: if not any(f.fnmatch(p) for p in include_patterns): continue common.trace('Checking ' + str(f.path) + '...') content = f.contents if not common.binary(content) and token(content.decode()): if not abort: common.error(WARNING % (tr[config_name][1])) for n in line_match(token, content.decode()): common.error(FILEWARN % (f.path, n)) abort = True count += 1 if abort: common.error('Hook "' + config_name + '" failed.') common.note('%d file(s) checked.' % count) return abort
def main(): try: raw_port = get_option('server_port', "8888") server = ThreadingHTTPServer(('', int(raw_port)), StorageHandler) server.serve_forever() except KeyboardInterrupt: print("shutting down on keyboard interrupt", file=sys.stderr) server.socket.close() the_pool.closeall()
def _serve_root(self): inst_name = get_option("instance", "") config = """[root] instance=%s """ % (inst_name, ) body = config.encode("utf-8") self.send_response(200) self.send_header('Content-type', "text/plain; encoding=utf-8") self.send_header('Content-Length', len(body)) self.end_headers() self.wfile.write(body)
def __init__(self, artifact): self.artifact = artifact self.artifact['data']['yara'] = None cfg_rules = get_option('modules', 'yara_path', CONF) if cfg_rules == '': raise TypeError( 'Invalid YARA rules directory in conf file: Cannot be empty!') self.rules = os.path.join(CONF, os.path.abspath(cfg_rules)) if list_dir(self.rules) == 0: raise TypeError( 'Invalid YARA rules directory in conf file: No files contained in directory!' )
def __init__(self, cur, inst_name=None): CursorWrapper.__init__(self, cur) self.own_max_num_conn = int(get_option('own_max_num_conn', "4")) self.healthcheck_interval = int(get_option("healthcheck_interval", "100")) self.healthcheck_threshold = int(get_option("healthcheck_threshold", "80")) if self.healthcheck_threshold <= 0: # disabled self.healthcheck_interval = 0 self.target_queue = [] # of TargetBase descendants self.progressing = [] # of URL IDs self.volume_progressing = [] # of volume IDs self.total_checked = 0 self.total_processed = 0 self.total_error = 0 server_name = get_mandatory_option('server_name') raw_port = get_option('server_port', "8888") self.endpoint_root = "http://%s:%d" % (server_name, int(raw_port)) inst_name = get_option("instance", None) self.inst_id = get_instance_id(cur, inst_name) self.remote_inst_id = None # None => not initialized, "" => remote doesn't have instance
def __init__(self, conn, cur, single_action): # download (as opposed to parsing) host check is restricted by instance HostCheck.__init__(self, cur, get_option("instance", None)) self.conn = conn self.single_action = single_action self.host_id = 0 self.max_host_id = self.get_max_host() self.notification_relay = get_option("notification_relay", None) # according to HTTP spec, Retry-After can also have absolute # time value, but that had not been seen yet self.relative_rx = re.compile("^([0-9]{1,3})$") self.holds = {} # host_id -> int time in secs self.last_expiration = int(time.time() + 0.5) self.counter = 0 self.healthcheck_interval = int( get_option("healthcheck_interval", "100")) self.healthcheck_tail = int(get_option("healthcheck_tail", "100")) self.healthcheck_threshold = int( get_option("healthcheck_threshold", "80")) if (self.healthcheck_tail <= 0) or (self.healthcheck_threshold <= 0): # disabled self.healthcheck_interval = 0
def main(): top_protocols = get_option('top_protocols', 'http') protocols = re.split('\\s+', top_protocols) conn = make_connection() try: with conn.cursor() as cur: # maybe check here whether download and/or parse is running? it shouldn't... act_reset(cur) seeder = Seeder(cur) for a in sys.argv[1:]: if a.startswith('http'): pr = urlparse(a) seeder.add_host(pr.hostname) seeder.add_url(a) else: seeder.add_host(a) for protocol in protocols: seeder.add_url("%s://%s" % (protocol, a)) seeder.cond_add_instance() # for seeding w/o arguments seeder.seed_queue() finally: conn.close()
def __init__(self, cur): self.cur = cur self.canon = make_canonicalizer() self.inst_name = get_option("instance", None) self.inst_id = None
def __init__(self, single_action, conn, cur): DownloadBase.__init__(self, conn, cur, single_action) self.max_num_conn = int(get_option('max_num_conn', "10")) self.notification_threshold = int(get_option('download_notification_threshold', "1000")) self.accept_compressed = get_option('accept_compressed', True) self.force_ipv6 = get_option('force_ipv6', None) self.user_agent = get_option('user_agent', None) self.extra_header = get_option('extra_header', None) self.socks_proxy_host = get_option('socks_proxy_host', None) self.socks_proxy_port = int(get_option('socks_proxy_port', "0")) self.http_proxy_host = get_option('http_proxy_host', None) self.http_proxy_port = int(get_option('http_proxy_port', "0")) if self.socks_proxy_host and self.http_proxy_host: raise Exception("more than one proxy set") retry_after_default = get_option('retry_after_default', None) self.retry_after_default = None if retry_after_default is None else int(retry_after_default) self.mime_whitelist = { 'text/html' } mime_whitelist = get_option('mime_whitelist', None) if mime_whitelist: if mime_whitelist == "*": self.mime_whitelist = set() else: self.mime_whitelist.update(mime_whitelist.split())
def __init__(self, cur): CursorWrapper.__init__(self, cur) inst_name = get_option("instance", None) self.inst_id = get_instance_id(cur, inst_name)
def make_canonicalizer(): return DomainCanonicalizer() if get_option( "match_domain", False) else DefaultCanonicalizer()
def __init__(self, cur): self.cur = cur # no CursorWrapper methods needed here inst_name = get_option("instance", None) self.inst_id = get_instance_id(cur, inst_name)
def codingstyle(files, enable_reformat, check_lgpl, check_commits_date): source_patterns = common.get_option('codingstyle-hook.source-patterns', default='*.cpp *.cxx *.c').split() header_patterns = common.get_option('codingstyle-hook.header-patterns', default='*.hpp *.hxx *.h').split() misc_patterns = common.get_option('codingstyle-hook.misc-patterns', default='*.cmake *.txt *.xml *.json').split() code_patterns = source_patterns + header_patterns include_patterns = code_patterns + misc_patterns sort_includes = common.get_option('codingstyle-hook.sort-includes', default="true", type='--bool') == "true" global repoRoot repoRoot = common.get_repo_root() if repoRoot is None: common.warn("Cannot find 'fw4spl' repository structure") parent_repo = "" else: parent_repo = os.path.abspath(os.path.join(repoRoot, os.pardir)) fw4spl_configured_projects = common.get_option('codingstyle-hook.additional-projects', default=None) fw4spl_projects = [] if fw4spl_configured_projects is None: # no additional-projects specified in config file. Default is parent repository folder fw4spl_projects.append(parent_repo) else: fw4spl_projects = fw4spl_configured_projects.split(";") # adds current repository folder to the additional-projects specified in config file. fw4spl_projects.append(repoRoot) # normalize pathname fw4spl_projects = list(map(os.path.normpath, fw4spl_projects)) # remove duplicates fw4spl_projects = list(set(fw4spl_projects)) global UNCRUSTIFY_PATH if common.g_uncrustify_path_arg is not None and len(common.g_uncrustify_path_arg) > 0: UNCRUSTIFY_PATH = common.g_uncrustify_path_arg else: UNCRUSTIFY_PATH = common.get_option('codingstyle-hook.uncrustify-path', default=UNCRUSTIFY_PATH, type='--path').strip() common.note('Using uncrustify: ' + UNCRUSTIFY_PATH) if common.execute_command(UNCRUSTIFY_PATH + ' -v -q').status != 0: common.error('Failed to launch uncrustify.\n') return [] checked = set() reformatted_list = [] sortincludes.find_libraries_and_bundles(fw4spl_projects) ret = False count = 0 reformat_count = 0 for f in files: if f in checked or not any(f.fnmatch(p) for p in include_patterns): continue content = f.contents if not common.binary(content): # Do this last because contents of the file will be modified by uncrustify # Thus the variable content will no longer reflect the real content of the file file_path = os.path.join(repoRoot, f.path) if os.path.isfile(file_path): res = format_file(file_path, enable_reformat, code_patterns, header_patterns, misc_patterns, check_lgpl, sort_includes, f.status, check_commits_date) count += 1 if res == FormatReturn.Modified: reformatted_list.append(f.path) reformat_count += 1 elif res == FormatReturn.Error: # Error in reformatting ret = True checked.add(f) common.note('%d file(s) checked, %d file(s) reformatted.' % (count, reformat_count)) return ret, reformatted_list
def __init__(self, single_action, conn, cur): VolumeHolder.__init__(self) HostCheck.__init__(self, cur) inst_name = get_option("instance", None) self.instance_id = get_instance_id( cur, inst_name) # self.inst_id already used by HostCheck self.mem_cache = MemCache( int(get_option('parse_cache_high_mark', "2000")), int(get_option('parse_cache_low_mark', "1000"))) if get_option('download_preference', 'novelty') == 'novelty': self.preference = NoveltyPreference( int(get_option('novelty_high_mark', "20000")), int(get_option('novelty_low_mark', "15000"))) else: self.preference = BreathPreference() self.single_action = single_action self.conn = conn self.notification_threshold = int( get_option('parse_notification_threshold', "1000")) page_limit = get_option("page_limit", None) self.page_limit = int(page_limit) if page_limit else None self.page_count = 0 self.max_url_len = int(get_option("max_url_len", "512")) # ignore case flag would be better dynamic, but Python 3.5.2 # doesn't support that... url_blacklist_rx = get_option("url_blacklist_rx", "[.](?:jpe?g|pdf|png)$") self.url_blacklist_rx = re.compile(url_blacklist_rx, re.I) if url_blacklist_rx else None url_whitelist_rx = get_option("url_whitelist_rx", None) self.url_whitelist_rx = re.compile(url_whitelist_rx, re.I) if url_whitelist_rx else None self.comp_param = True if get_option("comp_param", True) else False if self.comp_param: self.cur.execute("""select nameval from param_blacklist order by nameval""") rows = self.cur.fetchall() self.param_blacklist = set((row[0] for row in rows))
#!/usr/bin/python3 import gzip from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer from io import BytesIO from psycopg2 import pool import re import shutil import sys from common import get_mandatory_option, get_option, schema from storage_bridge import StorageBridge from volume_bridge import VolumeBridge the_pool = pool.ThreadedConnectionPool(database='ampelopsis', host=get_option('dbhost', 'localhost'), user=get_mandatory_option('dbuser'), password=get_mandatory_option('dbpass'), minconn=0, maxconn=int( get_option('own_max_num_conn', "4"))) def get_connection(): conn = the_pool.getconn() conn.autocommit = True if schema: with conn.cursor() as cur: cur.execute("set search_path to " + schema)