def solve(self, fractal): margin = cfg.getint('Plot', 'margin') size = cfg.getint('Plot', 'size') spacing = cfg.getint('Solver', 'service_grid_spacing') def in_bounds(p): if p.x < margin or p.x > (size - margin): return False if p.y < margin or p.y > (size - margin): return False return True def service_level(eval_point): return min([(eval_point[0] - p.x) ** 2 + (eval_point[1] - p.y) ** 2 for p in valid_point_set]) point_set = fractal.point_set(1) valid_point_set = filter(in_bounds, point_set) eval_set = [(x, y) for x in range(margin, size - margin, spacing) for y in range(margin, size - margin, spacing) ] fractal.service_penalty = float(sum(map(service_level, eval_set)) / len(eval_set)) fractal.length_penalty = 1.0 # max(1.0, (fractal.total_length / len(point_set))) fractal.complexity_penalty = 1.0 # int(max(0, len(point_set))) ** 2 fractal.bounds_penalty = max(1, (len(point_set) - len(valid_point_set)) * 100) ** 2 fractal.fitness = 1.0 / (fractal.service_penalty + fractal.length_penalty + fractal.complexity_penalty + fractal.bounds_penalty) # print "fitness: %s" % fractal.fitness return fractal
class Judger(object): __SmallFileMaxSize = cfg.getint('ScanFile', 'SmallFile') * 1024 __LastUpdateSeconds = cfg.getint('ScanFile', 'LastUpdate') * 3600 __CodecCheckSize = cfg.getint('ScanFile', 'CodecCheck') __ExcludedExtensions = cfg.get('ScanFile', 'ExcludedExt').lower().split() @classmethod def filter(cls, file_fullname): try: size = path.getsize(file_fullname) last_update = path.getmtime(file_fullname) if time() - last_update > cls.__LastUpdateSeconds: # long time no update return 1 if win and last_update <= path.getctime(file_fullname): # not update after create(no create time for linux) return 1 if size < cls.__SmallFileMaxSize: # too small, looks not like a production log return 2 if file_fullname[ file_fullname.rfind('.'):].lower() in cls.__ExcludedExtensions: # known file extension, not log return 3 if (not win) and access(file_fullname, X_OK): # unix executive, not log return 4 with open(file_fullname, 'rb') as fp: # not txt file, not log if size > cls.__CodecCheckSize * 2: # 文件中间判断,准确性可能大些 fp.seek(int(size / 2)) charset = detect(fp.read(cls.__CodecCheckSize)) if charset['confidence'] < 0.5: return 5 return charset except Exception as err: log.warning(file_fullname + '\t' + str(err)) return 0
def proxyMain(): port = cfg.getint('http','proxy_port') numThreads = cfg.getint('http','proxy_threads', 1) server_address = ('', port) global proxy_httpd proxy_httpd = httpserver.PooledHTTPServer(server_address, proxyhandler.ProxyHandler, numThreads) log.info('Proxy: %s', proxy_httpd.report_config()) proxy_httpd.serve_forever()
def indexMain(): interval = cfg.getint('indexing','interval',3) log.info('Scheduled index thread to run every %s minutes' % interval) while not _shutdownEvent.wait(interval * 60): if _shutdownEvent.isSet(): break qmsg_processor.backgroundIndexTask()
def register(self, ptype): """register ptype as a local typedef""" # Too many of them leads to memory burst if len(self.typedefs) < cfg.getint('typing', 'max_combiner'): self.typedefs.append(ptype) return True return False
def adminMain(): port = cfg.getint('http','admin_port') server_address = ('', port) global admin_httpd admin_httpd = httpserver.HTTPServer(server_address, app_httpserver.AppHTTPRequestHandler) log.info("Start admin on '%s' port %s" % server_address) app_httpserver.log.info('app_httpserver setup: docBase=%s', app_httpserver.AppHTTPRequestHandler.docBase) admin_httpd.serve_forever()
def normalised_segment_set(self): origin = OriginPoint(self) depth_limit = cfg.getint("Fractal", "recursion_limit") def recurse(base): r = {"x": base.x, "y": base.y, "children": []} if base.depth < depth_limit and not base.terminate(): for segment in base.segments(): end_point = segment.end() r["children"].append(recurse(end_point)) return r segments = recurse(origin) return segments
def point_set(self, every=1): self.total_length = 1 origin = OriginPoint(self) points = [] depth_limit = cfg.getint("Fractal", "recursion_limit") def recurse(base): if base.depth % every == 0: points.append(base) if base.depth < depth_limit and not base.terminate(): segments = base.segments() self.total_length += segments[0].length() for segment in segments: recurse(segment.end()) recurse(origin) return points
def __init__(self): self.fittest = None self.generation = 0 initial_solution = Solution() initial_solution.length_function = Expression(init_terms=[createTerm('Constant', innerMultiplier=3.0, outerMultiplier=3.0)]) initial_solution.radiance_function = Expression(init_terms=[createTerm('Constant', innerMultiplier=1.0, outerMultiplier=1.5)]) initial_solution.orientation_function = Expression(init_terms=[createTerm('Constant', innerMultiplier=-0.1, outerMultiplier=0.1)]) initial_solution.termination_function = Expression(init_terms=[createTerm('Constant', innerMultiplier=3.0, outerMultiplier=3.0)]) self.solutions = [initial_solution] workers = cfg.getint('FitnessTest', 'workers') if workers > 1: print "Evaluating using {workers} worker threads".format(workers=workers) self.map = Pool(processes=workers).map_async else: self.map = map self.max_fitness_acheived = 0
def generate(self, ctx): # gather all underlying types and make sure they do not appear twice mct = cfg.getint('typing', 'max_container_type') all_types = self.all_types() fot0 = lambda t: type(t) is IndexableType fot1 = lambda t: type(t) is ContainerType fit = lambda t: not fot0(t) and not fot1(t) it = filter(fit, all_types) ot0 = filter(fot0, all_types) ot1 = filter(fot1, all_types) icombined = sorted(set(ctx(t).generate(ctx) for t in it)) lcombined0 = sorted(set(ctx(t).generate(ctx) for t in ot0))[-mct:] lcombined1 = sorted(set(ctx(t).generate(ctx) for t in ot1))[-mct:] combined = icombined + lcombined0 + lcombined1 if len(combined) == 1: return combined[0] else: return 'typename __combined<{0}>::type'.format(",".join(combined))
def mutate(self): mutate_count = 0 # maybe delete a term if self.terms and random() > (1 - (cfg.getfloat('Function', 'term_deletion_chance') * len(self.terms))): self._delete_term() # potentially create a new term. always do so if there are none. if not self.terms or random() > (1 - cfg.getfloat('Function', 'term_creation_chance')): self._add_term() # modify some term constants if self.terms: while mutate_count < cfg.getint('Mutator', 'number_of_terms'): if random() > cfg.getfloat('Mutator', 'probability'): t = choice(self.terms) t.mutate() mutate_count += 1
def draw(self, seq): # (0,150,255) [0x0096ff] -> (42,22,69) [0x45162a] def colour_lookup(ratio, shade=False): r = 000 + (ratio * (42 - 000)) g = 150 + (ratio * (22 - 150)) b = 255 + (ratio * (69 - 255)) if shade: r /= 3.0 g /= 3.0 b /= 3.0 return "rgb({},{},{})".format(int(r), int(g), int(b)) im = Image.new("RGBA", (cfg.getint("Plot", "size"), cfg.getint("Plot", "size")), (10, 4, 27, 255)) draw = ImageDraw.Draw(im) draw.line( ( (cfg.getint("Plot", "margin"), cfg.getint("Plot", "margin")), (cfg.getint("Plot", "margin"), cfg.getint("Plot", "size") - cfg.getint("Plot", "margin")), ( cfg.getint("Plot", "size") - cfg.getint("Plot", "margin"), cfg.getint("Plot", "size") - cfg.getint("Plot", "margin"), ), (cfg.getint("Plot", "size") - cfg.getint("Plot", "margin"), cfg.getint("Plot", "margin")), (cfg.getint("Plot", "margin"), cfg.getint("Plot", "margin")), ), fill="rgb(24,12,54)", ) points = self.fractal.point_set() # sort by depth so oldest segments are drawn on top points.sort(key=lambda p: -p.depth) # for point in points: # fill = colour_lookup(float(point.depth) / (points[0].depth + 1), shade=True) # service_x = (point.x // constants.SERVICE_GRID_SPACING) * constants.SERVICE_GRID_SPACING # service_y = (point.y // constants.SERVICE_GRID_SPACING) * constants.SERVICE_GRID_SPACING # draw.rectangle( # (service_x + 1, service_y + 1, service_x + constants.SERVICE_GRID_SPACING - 1, service_y + constants.SERVICE_GRID_SPACING - 1), # fill=fill # "rgb(25,20,37,20)" # ) for point in points: fill = colour_lookup(float(point.depth) / (points[0].depth + 1)) for segment in point.segments(): end = segment.end() if end.x >= 0 and end.y >= 0 and end.x <= cfg.get("Plot", "size") and end.y <= cfg.get("Plot", "size"): draw.line((point.x, point.y, end.x, end.y), fill=fill) im.save("output/out." + str(seq) + ".png", "PNG")
def _dist_to_origin(self): try: return min(cfg.getint("Plot", "size"), hypot(self.x - origin_x, self.y - origin_y), key=abs) except OverflowError: return float_info.max
from math import hypot, sin, cos, pi from sys import float_info from config import cfg # calls to config seem to be slow, so 'cache' these here branch_every = cfg.getint("Branch", "every") branch_segments = cfg.getint("Branch", "segments") origin_x = cfg.getint("Fractal", "origin_x") origin_y = cfg.getint("Fractal", "origin_y") class Point(object): def __unicode__(self): return str(self.x) + ", " + str(self.y) def __init__(self, x, y, fractal, depth, parent_orientation): self.x = int(x) self.y = int(y) self.fractal = fractal self.depth = depth self.dist_to_origin = self._dist_to_origin() self.parent_orientation = parent_orientation self.segment_count = 1 if self.depth % branch_every else branch_segments def _dist_to_origin(self): try: return min(cfg.getint("Plot", "size"), hypot(self.x - origin_x, self.y - origin_y), key=abs) except OverflowError: return float_info.max def radiance(self):
class Scanner(object): __SleepSeconds = cfg.getint('ScanFile', 'Sleep') __MaxFiles = cfg.getint('ScanFile', 'MaxFiles') __MaxSeconds = cfg.getint('ScanFile', 'MaxSeconds') if win: # 根据不同操作系统设置起始扫描目录 __InitialPaths = [ chr(i) + ':\\' for i in range(0x61, 0x7a) if os.path.isdir(chr(i) + ':\\') ] __ExcludedPaths = cfg.get('ScanFile', 'ExcludedWin').lower().split() else: __InitialPaths = ['/'] __ExcludedPaths = cfg.get('ScanFile', 'ExcludedUnix').lower().split() def __init__(self, sample_list_file=os.path.join(cfg.get('Log', 'Folder'), 'samples.lst')): self.__SampleListFile = sample_list_file def run(self): with open(self.__SampleListFile, 'w', encoding='utf-8') as fp: scaned_files, sampled_files, err_counters = 0, 0, [ 0, 0, 0, 0, 0, 0 ] for initial_path in self.__InitialPaths: for dir_path, dir_names, file_names in os.walk(initial_path): if False in [ not match(excluded_path, dir_path) for excluded_path in self.__ExcludedPaths ]: # 跳过例外目录 dir_names[:] = [] # 跳过例外目录的子目录 continue if not os.access(dir_path, os.X_OK | os.R_OK): # 有的目录下面的循环拦不住! log.warning('[Permission Denied:] ' + dir_path) continue for dir_name in dir_names: # 对无权进入的子目录,从扫描列表中清除并记录告警日志 dir_fullname = os.path.join(dir_path, dir_name) if not os.access(dir_fullname, os.X_OK | os.R_OK): dir_names.remove(dir_name) log.warning('[Permission denied:] ' + dir_fullname) if len(file_names ) > self.__MaxFiles: # 目录下文件特别多,很可能是数据文件目录 log.warning('[Too Many Files]( ' + str(len(file_names)) + '), Ignoring:' + dir_path) continue timer = time.time() for file_name in file_names: try: scaned_files += 1 if scaned_files % 1000 == 0: log.info( 'Files scaned:[%d], error[%d], inactive[%d], small[%d], wrong-type[%d], non-text[%d], candidate[%d]\t%s' % (scaned_files, err_counters[0], err_counters[1], err_counters[2], err_counters[3], err_counters[4] + err_counters[5], sampled_files, dir_path)) if time.time( ) - timer > self.__MaxSeconds: # Too slow to scan a folder log.warning( '[Too slow to scan, Ignoring:]( ' + dir_path) break time.sleep(self.__SleepSeconds) # 防止过多占有系统资源 file_fullname = os.path.join(dir_path, file_name) rc = Judger.filter(file_fullname) if type(rc) is int: # 该文件不是候选日志,无需采 err_counters[rc] += 1 continue print(file_fullname, file=fp) sampled_files += 1 except Exception as err: # 出现过目录/文件名为乱字符导致写fp文件出现字符集异常情况 log.error(str(err)) log.info( 'Finish scan:[%d], error[%d], inactive[%d], small[%d], wrong-type[%d], non-text[%d], candidate[%d]' % (scaned_files, err_counters[0], err_counters[1], err_counters[2], err_counters[3], err_counters[4] + err_counters[5], sampled_files))
class Sampler(object): __CodecCheckSize = cfg.getint('ScanFile', 'CodecCheck') __StartLine = cfg.getint('Sample', 'StartingLine') __EndLine = __StartLine + cfg.getint('Sample', 'SampleLines') __MaxSize = cfg.getint('Sample', 'MaxSize') * 1024 * 1024 __OutputPath = cfg.get('Sample', 'DataPath') __OutputFormat = cfg.getint('Sample', 'Format') __RegularExpFrom = cfg.get('Sample', 'From') __RegularExpTo = cfg.get('Sample', 'To') if os.path.exists(__OutputPath): rmtree(__OutputPath) sleep(1) # 防止立刻建立目录出错 os.mkdir(__OutputPath) @classmethod def sample(cls, files_list): log.info('Starting Samples %d files' % len(files_list)) if cls.__OutputFormat == 0: cls.__merge(cls, files_list) else: cls.__copy(cls, files_list) def __merge(self, file_fullnames): # 列表中文件每个一行的形式输出到os.ipaddress.sample.dat if win: output_filename = gethostbyname(gethostname()) + '.samples.dat' else: cmd = "ifconfig|grep 'inet addr:'|grep -v '127.0.0.1'|cut -d: -f2|awk '{print $1}'|head -1" output_filename = os.popen(cmd).read().strip() + '.samples.dat' with open(os.path.join(self.__OutputPath, output_filename), 'w', encoding='utf-8') as fp: for file_fullname in file_fullnames: log.info('Sampling ' + file_fullname) current_position = fp.tell() try: fp.write('\n' + file_fullname + '\t') for line in self.__readLine(self, file_fullname): fp.write(line.replace('\n', '\0')) except Exception as err: log.warning(file_fullname + '\t' + str(err)) fp.seek(current_position) continue def __copy(self, file_fullnames): output_file = '' for input_file in file_fullnames: log.info('Sampling ' + input_file) try: if self.__OutputFormat == 2: # 分目录保存样本文件 if win: curr_path = self.__OutputPath + os.sep + os.path.split( input_file)[0].replace(':', '_') else: curr_path = self.__OutputPath + os.path.split( input_file)[0] os.makedirs(curr_path, exist_ok=True) output_file = os.path.join(curr_path, os.path.split(input_file)[1]) else: # 保存在同一目录中,文件名中体现原目录结构 file_name = input_file.replace(os.sep, '_').replace(':', '_') output_file = self.__OutputPath + '/' + file_name with open(output_file, 'w', encoding='utf-8') as fp: for line in self.__readLine(self, input_file): fp.write(line) except Exception as err: log.warning(input_file + '\t' + str(err)) if os.path.exists(output_file): os.remove(output_file) continue def __readLine(self, file_fullname, encoding='ascii'): with open(file_fullname, 'rb') as fp: size = os.path.getsize(file_fullname) if size > self.__MaxSize: # 出现过几十G大文件、第一行就非常大,导致内存耗尽情况 fp.seek(-self.__MaxSize, 2) for lines, line_binary in enumerate(fp): if lines < self.__StartLine: continue if lines > self.__EndLine: break try: line = line_binary.decode(encoding=encoding) log.debug(str(lines) + ' ' + line) if self.__RegularExpFrom != '': line = re.sub(self.__RegularExpFrom, self.__RegularExpTo, line) yield line except UnicodeDecodeError: encoding = detect(line_binary[:self.__CodecCheckSize])[ 'encoding'] # 出现过一行10M,本函数不退出的情况 if encoding is None: raise # 无法识别编码,向上层传递异常 line = line_binary.decode(encoding=encoding) if self.__RegularExpFrom != '': line = re.sub(self.__RegularExpFrom, self.__RegularExpTo, line) yield line
def _add_term(self): if len(self.terms) < cfg.getint('Function', 'maximum_terms'): t = createTerm('Random') self.terms.append(t)