def patch(ngx_src, patch_path): '''patch nginx''' work_dir =os.path.abspath(os.curdir) os.chdir(ngx_src) abs_patch_path = os.path.abspath(patch_path) cmds = ['patch -p0 -t -s < ' + abs_patch_path, 'patch -p1 -t -s < ' + abs_patch_path, 'patch -p2 -t -s < ' + abs_patch_path, 'patch -p3 -t -s < ' + abs_patch_path] for cmd in cmds: print cmd f = os.popen(cmd) output = f.read() if 'Skipping' in output: continue else: os.chdir(work_dir) return os.chdir(work_dir) input = raw_input("Patch failed, continue?(yes/no)") if input == "yes": print "continue.." return else: error_exit('Patch failed!')
def main(): es_endpoint_ip = ctx.node.properties['es_endpoint_ip'] es_endpoint_port = ctx.node.properties['es_endpoint_port'] if not es_endpoint_ip: es_endpoint_ip = ctx.instance.host_ip _install_elasticsearch() utils.systemd.start('elasticsearch') utils.wait_for_port(es_endpoint_port, es_endpoint_ip) _configure_elasticsearch(host=es_endpoint_ip, port=es_endpoint_port) utils.systemd.stop('elasticsearch') utils.clean_var_log_dir('elasticsearch') else: ctx.logger.info('External Elasticsearch Endpoint provided: ' '{0}:{1}...'.format(es_endpoint_ip, es_endpoint_port)) time.sleep(5) utils.wait_for_port(es_endpoint_port, es_endpoint_ip) ctx.logger.info('Checking if \'cloudify_storage\' ' 'index already exists...') if http_request('http://{0}:{1}/cloudify_storage'.format( es_endpoint_ip, es_endpoint_port), method='HEAD'): utils.error_exit('\'cloudify_storage\' index already exists on ' '{0}, terminating bootstrap...'.format( es_endpoint_ip)) _configure_elasticsearch(host=es_endpoint_ip, port=es_endpoint_port) ctx.instance.runtime_properties['es_endpoint_ip'] = es_endpoint_ip
def resumable_upload(self, insert_request): ''' This method implements an exponential backoff strategy to resume a failed upload. ''' response = None error = None retry = 0 while response is None: try: status, response = insert_request.next_chunk() if 'id' in response: print('''Video ID `%s' was successfully uploaded. \ Its visibility is set to `%s'.''' % (response['id'], self.settings['privacy'])) print('''URL of the newly uploaded video: \ <https://www.youtube.com/watch?v=%s>''' % response['id']) print('''It may take some time for the video to \ finish processing; typically 1-10 minutes.''') else: error_exit('''The upload failed with an unexpected \ response: %s''' % response) except HttpError, e: if e.resp.status in self.retriable_status_codes: error = '''A retriable HTTP error %d occurred:\n%s''' % ( e.resp.status, e.content ) else: raise except self.retriable_exceptions, e: error = 'A retriable error occurred: %s' % e
def _set_security(rabbitmq_ssl_enabled, rabbitmq_cert_private, rabbitmq_cert_public): # Deploy certificates if both have been provided. # Complain loudly if one has been provided and the other hasn't. if rabbitmq_ssl_enabled: if rabbitmq_cert_private and rabbitmq_cert_public: utils.deploy_ssl_certificate( 'private', '/etc/rabbitmq/rabbit-priv.pem', 'rabbitmq', rabbitmq_cert_private) utils.deploy_ssl_certificate( 'public', '/etc/rabbitmq/rabbit-pub.pem', 'rabbitmq', rabbitmq_cert_public) # Configure for SSL utils.deploy_blueprint_resource( '{0}/rabbitmq.config-ssl'.format(CONFIG_PATH), '/etc/rabbitmq/rabbitmq.config') else: utils.error_exit('When providing a certificate for rabbitmq, ' 'both public and private certificates must be ' 'supplied.') else: utils.deploy_blueprint_resource( '{0}/rabbitmq.config-nossl'.format(CONFIG_PATH), '/etc/rabbitmq/rabbitmq.config') if rabbitmq_cert_private or rabbitmq_cert_public: ctx.logger.warn('Broker SSL cert supplied but SSL not enabled ' '(broker_ssl_enabled is False).')
def process(arguments): access_log = arguments['--access-log'] log_format = arguments['--log-format'] if access_log is None and not sys.stdin.isatty(): # assume logs can be fetched directly from stdin when piped access_log = 'stdin' if access_log is None: access_log, log_format = detect_log_config(arguments) logging.info('access_log: %s', access_log) logging.info('log_format: %s', log_format) if access_log != 'stdin' and not os.path.exists(access_log): error_exit('access log file "%s" does not exist' % access_log) if arguments['info']: print('nginx configuration file:\n ', detect_config_path()) print('access log file:\n ', access_log) print('access log format:\n ', log_format) print('available variables:\n ', ', '.join(sorted(extract_variables(log_format)))) return source = build_source(access_log, arguments) pattern = build_pattern(log_format) processor = build_processor(arguments) setup_reporter(processor, arguments) process_log(source, pattern, processor, arguments)
def load(self, json_path): if json_path is None: json_path = self.json_path if not os.path.exits(json_path): error_exit("Json File Not Found!") f = open(json_path, 'r+') j = json.load(f) self.info = j.loads(j)
def publish(self,server): data = json.dumps(self.info) try: req = urllib2.Request(server) response = urllib2.urlopen(req, data) except Exception as e: print e error_exit("Publish failed") return response.read()
def install_logstash(): logstash_unit_override = '/etc/systemd/system/logstash.service.d' logstash_source_url = ctx.node.properties['logstash_rpm_source_url'] rabbitmq_username = ctx.node.properties['rabbitmq_username'] rabbitmq_password = ctx.node.properties['rabbitmq_password'] logstash_log_path = '/var/log/cloudify/logstash' logstash_conf_path = '/etc/logstash/conf.d' # injected as an input to the script ctx.instance.runtime_properties['es_endpoint_ip'] = \ os.environ.get('ES_ENDPOINT_IP') ctx.instance.runtime_properties['rabbitmq_endpoint_ip'] = \ utils.get_rabbitmq_endpoint_ip() # Confirm username and password have been supplied for broker before # continuing. # Components other than logstash and riemann have this handled in code. # Note that these are not directly used in this script, but are used by the # deployed resources, hence the check here. if not rabbitmq_username or not rabbitmq_password: utils.error_exit( 'Both rabbitmq_username and rabbitmq_password must be supplied ' 'and at least 1 character long in the manager blueprint inputs.') ctx.logger.info('Installing Logstash...') utils.set_selinux_permissive() utils.copy_notice('logstash') utils.yum_install(logstash_source_url) utils.mkdir(logstash_log_path) utils.chown('logstash', 'logstash', logstash_log_path) ctx.logger.info('Creating systemd unit override...') utils.mkdir(logstash_unit_override) utils.deploy_blueprint_resource( '{0}/restart.conf'.format(CONFIG_PATH), '{0}/restart.conf'.format(logstash_unit_override)) ctx.logger.info('Deploying Logstash conf...') utils.deploy_blueprint_resource( '{0}/logstash.conf'.format(CONFIG_PATH), '{0}/logstash.conf'.format(logstash_conf_path)) ctx.logger.info('Deploying Logstash sysconfig...') utils.deploy_blueprint_resource( '{0}/logstash'.format(CONFIG_PATH), '/etc/sysconfig/logstash') utils.logrotate('logstash') utils.sudo(['/sbin/chkconfig', 'logstash', 'on']) utils.clean_var_log_dir('logstash')
def _get_path_from_url(url): """ a function to get path name for a repo [url] must be a git url """ try: repo = re.split("/", url)[-1] name = re.split("\.", repo)[0] except: error_exit("git url error, check the git url.") return name
def __parse_wiki(self,url): """ parse the wiki site to get information """ print "Begin to get information on wiki site, please wait ........." req = urllib2.Request(url) try: html = urllib2.urlopen(url, timeout=20).read() except: error_exit("url open error!\n") #To Do: encode content = BeautifulSoup(html) table = content.select("[class~=modules-index-table]")[0] module_table = [] ptrs = table.find_all("tr") for ptr in ptrs[1:]: tds = ptr.find_all("td") detail = dict() if tds[0].a.string: detail["name"] = tds[0].a.string.strip() else: detail["name"] = None if tds[1].string: detail["description"] = tds[1].string.strip() else: detail["description"] = None #author or authors atags = tds[2].find_all('a') author = '' if len(atags): for atag in atags: author += atag.string + ' ' else: author = tds[2].string detail["author"] = author.strip() if len(tds) == 4 and tds[3].find_all("a"): if tds[3].a["href"].startswith("/File"): detail["link"] = "http://wiki.nginx.org"+tds[3].a["href"] else: detail["link"] = tds[3].a["href"] else: detail["link"] = None module_table.append(detail) return module_table
def detect_custom_log(arguments): """ Get the custom log format specified in a custom config file :return: log format """ custom_log = arguments['--custom-log-format'] if not os.path.exists(custom_log): error_exit('Custom format config not found: %s' % custom_log) config = ConfigParser.ConfigParser() config.read(custom_log) log_format = config.get('log_format', 'log_format') log_format = log_format.replace('\n', '').replace('\'', '') return log_format
def detect_log_config(arguments): """ Detect access log config (path and format) of nginx. Offer user to select if multiple access logs are detected. :return: path and format of detected / selected access log """ config = arguments['--config'] if config is None: config = detect_config_path() if not os.path.exists(config): error_exit('Nginx config file not found: %s' % config) with open(config) as f: config_str = f.read() access_logs = dict(get_access_logs(config_str)) if not access_logs: error_exit('Access log file is not provided and ngxtop cannot detect it from your config file (%s).' % config) log_formats = dict(get_log_formats(config_str)) if len(access_logs) == 1: log_path, format_name = access_logs.items()[0] if format_name == 'combined': return log_path, LOG_FORMAT_COMBINED if format_name not in log_formats: error_exit('Incorrect format name set in config for access log file "%s"' % log_path) return log_path, log_formats[format_name] # multiple access logs configured, offer to select one print('Multiple access logs detected in configuration:') log_path = choose_one(access_logs.keys(), 'Select access log file to process: ') format_name = access_logs[log_path] if format_name not in log_formats: error_exit('Incorrect format name set in config for access log file "%s"' % log_path) return log_path, log_formats[format_name]
def main(): if len(sys.argv) > 3 or len(sys.argv) < 2: utils.error_exit("Usage: {} [keyword] dataset\n".format(os.path.basename(sys.argv[0]))) if len(sys.argv) == 3 and sys.argv[1] == "name": print(os.path.basename(sys.argv[-1])) else: stats = get_ds_stats(sys.argv[-1]) if len(sys.argv) == 2: print("'{}': {},".format(os.path.basename(sys.argv[-1]), stats)) else: if sys.argv[1] in stats: print(stats[sys.argv[1]]) else: utils.error_exit("Keyword '{}' not recognized\n".format(sys.argv[1]))
def get_ds_stats(dataset, force_compute = False): """ Return a dict containing the statistics about the dataset. Look up 'dataset' in datasetsinfo.ds_stat. If present, return that dict, otherwise, compute the stats. See the comment at the beginning of compute_ds_stats() for info about the dict.""" if dataset in datasetsinfo.ds_stats and force_compute == False: return datasetsinfo.ds_stats[dataset] else: if not os.path.isfile(dataset): utils.error_exit("{} not found in datasetsinfo.py and does not exist or is not a file\n".format(dataset)) return compute_ds_stats(dataset)
def load(self): if not os.path.exists(self.yaml): error_exit("Cannot find yaml file!") try: info = yaml.load(open(yaml_path,'r')) except yaml.scanner.ScannerError: raise PulishError("Wrong format") for item in info.keys(): try: self._check_item(info,item,self.rules[item]) except PulishError as e: error_exit(e.message) return info
def compile_with_dso(module_src_path, dso_path = '/usr/local/nginx/sbin/dso_tool',ngx_include_src='/usr/local/nginx/include'): print "Compiling with dso tool" cmd =dso_path+' --add-module='+module_src_path+' --nginx-include=' + ngx_include_src try: proc = subprocess.Popen(cmd.split(),stdout=open(os.devnull,'w'), stderr=PIPE) except Exception as e: error_exit(e.message) stdout, stderr = proc.communicate() output = stderr.decode('utf-8') if 'error' in output: err_msg = re.split('error:',output)[-1] error_exit(err_msg) print "Dso Config Success!"
def upload_tune(self, audio, image, args, video_ready=False): ''' Uploads a video to Youtube. ''' if not video_ready: self.generate_video(audio, image) if self.settings['generate_only']: print('Skipping Youtube upload.') exit() # Now upload the file to Youtube. print('Authenticating using the Youtube API...') try: youtube = self.get_authenticated_service(args) except httplib2.ServerNotFoundError, e: error_exit('%s.' % e)
def main(): global sample_size global population_size global dataset # Verify arguments if len(sys.argv) != 3: utils.error_exit("Usage: {} samplesize dataset\n".format(os.path.basename(sys.argv[0]))) dataset = sys.argv[2] try: sample_size = int(sys.argv[1]) except ValueError: utils.error_exit("{} is not a number\n".format(sys.argv[1])) ds_stats = getDatasetInfo.get_stats(dataset) population_size = ds_stats['size'] random.seed() t = Timer("create_sample()", "from __main__ import create_sample") sys.stderr.write("Creating the sample took: {} ms \n".format(t.timeit(1) * 1000))
def get_access_log(self): """ Get nginx access.log file path :return: access.log file path and log format """ if self.access_log is not None: return self.access_log self.access_log = self.arguments['--access-log'] log_format = self.arguments['--log-format'] if self.access_log is None and not sys.stdin.isatty(): # assume logs can be fetched directly from stdin when piped self.access_log = 'stdin' if self.access_log is None: self.access_log, log_format = detect_log_config(self.arguments) logging.info('access_log: %s', self.access_log) logging.info('log_format: %s', log_format) if self.access_log != 'stdin' and not os.path.exists(self.access_log): error_exit('access log file "%s" does not exist' % self.access_log) return self.access_log, log_format
def detect_config_path(): """ Get nginx configuration file path based on `nginx -V` output :return: detected nginx configuration file path """ try: proc = subprocess.Popen(['nginx', '-V'], stderr=subprocess.PIPE) except OSError: error_exit('Access log file or format was not set and nginx config file cannot be detected. ' + 'Perhaps nginx is not in your PATH?') stdout, stderr = proc.communicate() version_output = stderr.decode('utf-8') conf_path_match = re.search(r'--conf-path=(\S*)', version_output) if conf_path_match is not None: return conf_path_match.group(1) prefix_match = re.search(r'--prefix=(\S*)', version_output) if prefix_match is not None: return prefix_match.group(1) + '/conf/nginx.conf' return '/etc/nginx/nginx.conf'
def main(): """ Partition a dataset in two equal parts. """ # Verify arguments if len(sys.argv) != 5: utils.error_exit("Usage: {} dataset_size dataset_file expl_file eval_file\n".format(os.path.basename(sys.argv[0]))) dataset = sys.argv[2] expl = sys.argv[3] eval = sys.argv[4] try: dataset_size = int(sys.argv[1]) except ValueError: utils.error_exit("{} is not a number\n".format(sys.argv[1])) random.seed() expl_lines = frozenset(random.sample(range(dataset_size), dataset_size // 2)) with open(dataset, "rt") as largeFILE, open(expl, "wt") as explFILE, open(eval, "wt") as evalFILE: index = 0 for line in largeFILE: if index in expl_lines: explFILE.write(line) else: evalFILE.write(line) index += 1
def main(): if len(sys.argv) != 4: utils.error_exit("Usage: {} use_additional_knowledge={{0|1}} delta dataset\n".format(sys.argv[0])) try: use_additional_knowledge = int(sys.argv[1]) except ValueError: utils.error_exit("{} is not an integer\n".format(sys.argv[1])) try: delta = float(sys.argv[2]) except ValueError: utils.error_exit("{} is not an integer\n".format(sys.argv[2])) ds_stats = getDatasetInfo.get_ds_stats(sys.argv[3]) (eps_vc_dim, eps_emp_vc_dim, returned) = epsilon_dataset(delta, ds_stats, use_additional_knowledge) print("{} {}".format(eps_vc_dim, eps_emp_vc_dim)) print("{}\t{}".format(min(eps_vc_dim, eps_emp_vc_dim), returned))
def compile_without_dso(module_src_path, ngx_src, prefix, other=''): ''' call nginx configure file to auto compiler nginx with module src specified with path [module_src_path] is the source code of module [prefix] is the path of nginx/Tenginx, /usr/local/nginx by default [other] other configure options ''' config_path = os.path.join(ngx_src,'configure') work_dir =os.path.abspath(os.curdir) os.chdir(ngx_src) if other is None: other = '' if not os.path.exists(config_path): error_exit('Path not find!: ' + config_path) print 'configure nginx with module path : %s' %module_src_path # cover nginx binary or not first_install = False config_options = '' #for test: ngx3m reset # ignore the [other] and just do ./configure, make , make install if module_src_path is None: first_install = True else: config_options = ' --add-module=' + module_src_path if not os.path.exists(module_src_path): error_exit('Path not find: ' + module_src_path) if not os.path.exists(prefix): first_install = True cmd = config_path + ' --prefix='+prefix +config_options+' '+other try: proc = subprocess.Popen(cmd.split(), stdout=PIPE, stderr=PIPE) except Exception as e: error_exit(e.message) stdout, stderr = proc.communicate() stderr_output = stderr.decode('utf-8') stdout_output = stdout.decode('utf-8') if 'error:' in stderr_output: err_msg = re.split('error:', stderr_output)[-1] error_exit(err_msg) elif 'error:' in stdout_output: err_msg = re.split('error:', stdout_output)[-1] error_exit(err_msg) elif "Permission denied" in stderr_output: error_exit("Permission denied") print "make ......." try: proc1 = subprocess.Popen(["make"], stdout=open(os.devnull,'w'), stderr=PIPE) except Exception as e: os.chdir(work_dir) error_exit(e.message) stdout, stderr = proc1.communicate() output = stderr.decode('utf-8') if "***" in output: print output os.chdir(work_dir) error_exit("make failed!") elif "Permission denied" in output: os.chdir(work_dir) error_exit("Permission denied") if first_install: print "make install......." try: proc2 = subprocess.Popen(["make","install"],stdout=open(os.devnull,'w'),stderr=PIPE) except Exception as e: os.chdir(work_dir) error_exit("Compile failed!") stdout, stderr = proc2.communicate() output = stderr.decode('utf-8') if "***" in output or "error" in output: print output os.chdir(work_dir) error_exit("make install failed!") elif "Permission denied" in output: os.chdir(work_dir) error_exit("Permission denied") else: ngx_path = os.path.join(prefix, "sbin/nginx") ngx_sbin_dir = os.path.join(prefix, "sbin") ngx_path_bak = os.path.join(prefix, "sbin/nginx.old") ngx_path_new = "./objs/nginx" os.rename(ngx_path, ngx_path_bak) shutil.copy(ngx_path_new, ngx_sbin_dir) os.chdir(work_dir) return config_options print "compile succeed!"
def check_tile_type_exists_in_prolog(tile_type, prolog_file_info, error_msg): if len(prolog_file_info.get('%s_tile_ids' % tile_type)) < 1: error_exit("tile type (%s) not found in prolog file; %s" % (tile_type, error_msg))
def get_ngx_info(prefix): ngx_info = dict() if prefix is None: print "try default nginx path:/usr/local/nginx/sbin/nginx" path = "/usr/local/nginx/sbin/nginx" prefix = "/usr/local/nginx" else: ngx_info['prefix'] = prefix ngx_info['ngx_src'] = None path = os.path.join(prefix,"sbin/nginx") try: proc = subprocess.Popen([path,'-V'],stderr=subprocess.PIPE) except Exception as e: print e error_exit('Cannot find nginx! Please use option -p/--prefix to specify the prefix of tengine!') stdout, stderr = proc.communicate() output = stderr.decode('utf-8') try: ngx_versions = re.findall(r'Tengine/(\S*)\s*[\(]nginx/(\S*)\)', output)[0] except IndexError: error_exit("Unknown tengine version! Please use option -p/--path to specify the path of tengine") ngx_info['versions'] = dict() ngx_info['versions']['tengine_version'] = ngx_versions[0] ngx_info['versions']['nginx_version'] = ngx_versions[1] ngx_info['config_arguments'] = [] conf_args = re.search(r'configure arguments: (.*)',output) try: config_args_detail = conf_args.group(1) if config_args_detail.startswith("--prefix="): config_arguments = re.findall(r'^--prefix=[\S]*([\S\s]*)', config_args_detail)[0].split() else: config_arguments = config_args_detail.split() ngx_info['config_arguments'] = config_arguments except Exception as e: print e ngx_info['config_arguments'] = None # get modules' info and store in self.modules ngx_info['modules'] = dict() try: modules = re.search(r'loaded modules:([\s\S]*)', output).group(1) module_list = re.findall(r'(ngx_[\S]*) \(([\S]*)[,\)]',modules) for module in module_list: ngx_info['modules'][str(module[0])] = dict() module_info = ngx_info['modules'][str(module[0])] module_info['static'] = True if module[1] == 'static' else False module_info['wiki_name'] = None # only module installed with ngx3m can we know the version and install time module_info['version'] = None module_info['install_time'] = None except: ngx_info['modules'] = None return ngx_info
def get_trueFIs(ds_stats, res_filename, min_freq, delta, gap=0.0, use_additional_knowledge=False): """ Compute the True Frequent Itemsets using the VC method we present in the paper. The parameter 'use_additional_knowledge' can be used to incorporate additional knowledge about the data generation process. 'gap' controls how close to the optimal solution we ask the CPLEX solver to go. The right way to implement this would be to use a user-defined function in CPLEX. Returns a pair (trueFIs, stats). 'trueFIs' is a dict whose keys are itemsets (frozensets) and values are frequencies. This collection of itemsets contains only TFIs with probability at least 1 - delta. 'stats' is a dict containing various statistics used in computing the collection of itemsets.""" stats = dict() # One may want to play with giving different values for the different error # probabilities, but there isn't really much point in it. lower_delta = 1.0 - math.sqrt(1 - delta) # Compute the maximum frequency of an itemset in the dataset with open(res_filename, 'rt') as FILE: size_line = FILE.readline() try: size_str = size_line.split("(")[1].split(")")[0] except IndexError: error_exit( "Cannot compute size of the dataset: '{}' is not in the recognized format\n" .format(size_line)) try: size = int(size_str) except ValueError: error_exit( "Cannot compute size of the dataset: '{}' is not a number\n". format(size_line.split("(")[1].split(")")[0])) max_freq_line = FILE.readline() if max_freq_line.find("(") > -1: tokens = max_freq_line.split("(") itemset = frozenset(map(int, tokens[0].split())) try: support = int(tokens[1][:-2]) except ValueError: error_exit( "Cannot compute the maximum frequency: '{}' is not a number\n" .format(tokens[1][:-2])) max_freq = support / size else: error_exit( "Cannot compute the maximum frequency: '{}' is not in the recognized format\n" .format(max_freq_line)) # Compute the first epsilon using results from the paper (Riondato and # Upfal 2014) # Incorporate or not 'previous knowledge' about generative process in # computation of the VC-dimension, depending on the option passed on the # command line (eps_vc_dim, eps_shatter, returned) = epsilon.epsilon_dataset(lower_delta, ds_stats, use_additional_knowledge, max_freq) stats['epsilon_1'] = min(eps_vc_dim, eps_shatter) items = ds_stats['items'] items_num = len(items) lengths_dict = ds_stats['lengths'] lengths = sorted(lengths_dict.keys(), reverse=True) # Extract the first (and largest) set of itemsets with frequency at least # min_freq - stats['epsilon_1'] lower_bound_freq = min_freq - stats['epsilon_1'] - (1 / ds_stats['size']) freq_itemsets_1_dict = utils.create_results(res_filename, lower_bound_freq) freq_itemsets_1_set = frozenset(freq_itemsets_1_dict.keys()) freq_itemsets_1_sorted = sorted(freq_itemsets_1_set, key=lambda x: freq_itemsets_1_dict[x]) freq_items_1 = set() for itemset in freq_itemsets_1_set: if len(itemset) == 1: freq_items_1 |= itemset freq_items_1_num = len(freq_items_1) sys.stderr.write("First set of FI's: {} itemsets\n".format( len(freq_itemsets_1_set))) sys.stderr.flush() constr_start_str = "cplex.SparsePair(ind = [" constr_end_str = "], val = vals)" # Compute the "base set" (terrible name), that is the set of # itemsets with frequency < min_freq + epsilon_1 (but greater than min_freq # - stats['epsilon_1']. In the paper we call it \mathcal{G}. sys.stderr.write("Creating base set...") sys.stderr.flush() base_set = dict() # We use the maximum frequency in the base set to compute the epislon max_freq_base_set = 0 for itemset in freq_itemsets_1_sorted: if freq_itemsets_1_dict[itemset] < min_freq + stats['epsilon_1']: base_set[itemset] = freq_itemsets_1_dict[itemset] if freq_itemsets_1_dict[itemset] > max_freq_base_set: max_freq_base_set = freq_itemsets_1_dict[itemset] else: break stats['base_set'] = len(base_set) sys.stderr.write("done: {} itemsets\n".format(stats['base_set'])) sys.stderr.flush() # Compute Closed Itemsets. We need them to compute the maximal. sys.stderr.write("Computing closed itemsets...") sys.stderr.flush() closed_itemsets = utils.get_closed_itemsets(base_set) closed_itemsets_len = len(closed_itemsets) sys.stderr.write( "done. Found {} closed itemsets\n".format(closed_itemsets_len)) sys.stderr.flush() # Compute maximal itemsets. We will use them to compute the negative # border. An itemset is maximal frequent if none of its immediate supersets # is frequent. sys.stderr.write("Computing maximal itemsets...") sys.stderr.flush() maximal_itemsets_dict = utils.get_maximal_itemsets(closed_itemsets) maximal_itemsets = list(maximal_itemsets_dict.keys()) stats['maximal_itemsets'] = len(maximal_itemsets) sys.stderr.write("done. Found {} maximal itemsets\n".format( stats['maximal_itemsets'])) sys.stderr.flush() # Compute the negative border sys.stderr.write("Computing negative border...") sys.stderr.flush() negative_border = set() negative_border_items = set() # The idea is to look for "children" of maximal itemsets, and for # "siblings" of maximal itemsets for maximal in maximal_itemsets: for item_to_remove_from_maximal in maximal: reduced_maximal = maximal - frozenset([ item_to_remove_from_maximal, ]) for item in freq_items_1: if item in maximal: continue # Create sibling candidate = reduced_maximal | frozenset([item]) if candidate in freq_itemsets_1_set: continue if candidate in negative_border: continue to_add = True for item_to_remove in candidate: subset = candidate - frozenset([item_to_remove]) if subset not in freq_itemsets_1_set: to_add = False break if to_add: negative_border.add(candidate) negative_border_items |= candidate if not to_add: # if we added the sibling, there's no way we can add the # child candidate2 = maximal | frozenset([item]) # create child if candidate2 in negative_border: continue to_add = True for item_to_remove in candidate2: subset = candidate2 - frozenset([item_to_remove]) if subset not in freq_itemsets_1_set: to_add = False break if to_add: negative_border.add(candidate2) negative_border_items |= candidate # We don't need to add the non-frequent-items because none of them (or # their supersets) will ever be included in the output, so at most we lose # some statistical power, but it's not a problem of avoiding false # positives. # for item in non_freq_items_1: # negative_border.add(frozenset([item])) # negative_border_items.add(item) original_negative_border_len = len(negative_border) sys.stderr.write( "done. Length now: {}\n".format(original_negative_border_len)) sys.stderr.flush() # Add the "base set" to negative_border, so that it becomes a superset of # the "true" negative border (with some caveats about non-frequent single # items and their supersets, see comment above) sys.stderr.write("Adding base set...") sys.stderr.flush() for itemset in base_set: negative_border.add(itemset) negative_border_items |= itemset sys.stderr.write("done. Length now: {}\n".format(len(negative_border))) sys.stderr.flush() negative_border = sorted(negative_border, key=len, reverse=True) stats['negative_border'] = len(negative_border) negative_border_items_sorted = sorted(negative_border_items) # Create the graph that we will use to compute the chain constraints. # The nodes are the itemsets in negative_border. There is an edge between # two nodes if one is contained in the other or vice-versa. # Cliques on this graph are chains. sys.stderr.write("Creating graph...") sys.stderr.flush() graph = nx.Graph() graph.add_nodes_from(negative_border) sys.stderr.write("added nodes...adding edges...") sys.stderr.flush() negative_border_items_in_sets_dict = dict() negative_border_itemset_index = 0 itemset_indexes_dict = dict() for first_itemset_index in range(stats['negative_border']): first_itemset = negative_border[first_itemset_index] for second_itemset_index in range(first_itemset_index + 1, stats['negative_border']): second_itemset = negative_border[second_itemset_index] if first_itemset < second_itemset or \ second_itemset < first_itemset: graph.add_edge(first_itemset, second_itemset) for item in first_itemset: if item in negative_border_items_in_sets_dict: negative_border_items_in_sets_dict[item].append( negative_border_itemset_index) else: negative_border_items_in_sets_dict[item] = \ [negative_border_itemset_index, ] itemset_indexes_dict[first_itemset] = negative_border_itemset_index negative_border_itemset_index += 1 sys.stderr.write("done\n") sys.stderr.flush() capacity = freq_items_1_num - 1 if use_additional_knowledge and 2 * ds_stats['maxlen'] < capacity: sys.stderr.write("Lowering capacity={} to {}\n".format( capacity, 2 * ds_stats['maxlen'])) sys.stderr.flush() capacity = 2 * ds_stats['maxlen'] vars_num = stats['negative_border'] + len(negative_border_items) constr_names = [] (tmpfile_handle, tmpfile_name) = tempfile.mkstemp(prefix="cplx", dir=os.environ['PWD'], text=True) os.close(tmpfile_handle) with open(tmpfile_name, 'wt') as cplex_script: cplex_script.write("capacity = {}\n".format(capacity)) cplex_script.write("import cplex, os, sys\n") cplex_script.write("from cplex.exceptions import CplexError\n") cplex_script.write("\n") cplex_script.write("\n") cplex_script.write(" ".join( ("os.environ[\"ILOG_LICENSE_FILE\"] =", "\"/local/projects/cplex/ilm/site.access.ilm\"\n"))) cplex_script.write("vals = [-1.0, 1.0]\n") cplex_script.write("sets_num = {}\n".format(stats['negative_border'])) cplex_script.write("items_num = {}\n".format( len(negative_border_items))) cplex_script.write("vars_num = {}\n".format(vars_num)) cplex_script.write("my_ub = [1.0] * vars_num\n") cplex_script.write( "my_types = \"\".join(\"I\" for i in range(vars_num))\n") cplex_script.write( "my_obj = ([1.0] * sets_num) + ([0.0] * items_num)\n") cplex_script.write(" ".join( ("my_colnames =", "[\"set{0}\".format(i) for i in range(sets_num)] +", "[\"item{0}\".format(j) for j in range(items_num)]\n"))) cplex_script.write("rows = [ ") sys.stderr.write("Writing knapsack constraints...") sys.stderr.flush() constr_num = 0 for item_index in range(len(negative_border_items)): try: for itemset_index in negative_border_items_in_sets_dict[ negative_border_items_sorted[item_index]]: constr_str = "".join( (constr_start_str, "\"set{}\",\"item{}\"".format( itemset_index, item_index), constr_end_str)) cplex_script.write("{},".format(constr_str)) constr_num += 1 name = "s{}i{}".format(item_index, itemset_index) constr_names.append(name) except KeyError: sys.stderr.write(" ".join( ("item_index={}".format(item_index), "neg_border_items_sorted[item_index]={}\n".format( negative_border_items_sorted[item_index])))) sys.stderr.write("{} in items: {}\n".format( negative_border_items_sorted[item_index], negative_border_items_sorted[item_index] in items)) sys.stderr.write("{} in freq_items_1: {}\n".format( negative_border_items_sorted[item_index], negative_border_items_sorted[item_index] in freq_items_1)) non_freq_items_1 = items - freq_items_1 sys.stderr.write("{} in non_freq_items_1: {}\n".format( negative_border_items_sorted[item_index], negative_border_items_sorted[item_index] in non_freq_items_1)) in_pos_border = False pos_border_itemset = frozenset() for itemset in maximal_itemsets: if negative_border_items_sorted[item_index] in itemset: in_pos_border = True pos_border_itemset = itemset break sys.stderr.write( "{} in maximal_itemsets: {}. Itemset: {}\n".format( negative_border_items_sorted[item_index], in_pos_border, pos_border_itemset)) in_neg_border = False neg_border_itemset = frozenset() for itemset in negative_border: if negative_border_items_sorted[item_index] in itemset: in_neg_border = True neg_border_itemset = itemset break sys.stderr.write( "{} in negative_border: {}. Itemset: {}\n".format( negative_border_items_sorted[item_index], in_neg_border, neg_border_itemset)) sys.exit(1) # Create capacity constraints and write it to script constr_str = "".join( (constr_start_str, ",".join("\"item{}\"".format(j) for j in range(len(negative_border_items))), "], val=[", ",".join("1.0" for j in range(len(negative_border_items))), "])")) cplex_script.write(constr_str) last_tell = cplex_script.tell() cplex_script.write(",") cap_constr_name = "capacity" constr_names.append(cap_constr_name) sys.stderr.write("done\n") sys.stderr.flush() # Create chain constraints and write them to script sys.stderr.write("Writing chain constraints...") sys.stderr.flush() chains_index = 0 for clique in nx.find_cliques(graph): if len(clique) == 1: continue constr_str = "".join( (constr_start_str, ",".join( "\"set{}\"".format(j) for j in map(lambda x: itemset_indexes_dict[x], clique)), "], val=[1.0] * {}".format(len(clique)), ")")) cplex_script.write(constr_str) last_tell = cplex_script.tell() cplex_script.write(",") name = "chain{}".format(chains_index) constr_names.append(name) chains_index += 1 sys.stderr.write("done\n") sys.stderr.flush() sys.stderr.write(" ".join( ("Optimization problem: capacity={}".format(capacity), "vars_num={}".format(vars_num), "negative_border_size={}".format(stats['negative_border']), "negative_border_items_num={}".format(len(negative_border_items)), "constr_num={}".format(constr_num), "chains_index={}\n".format(chains_index)))) sys.stderr.flush() # Go back one character to remove last comma "," cplex_script.seek(last_tell) cplex_script.write("]\n") cplex_script.write("my_rownames = {}\n".format(constr_names)) cplex_script.write("constr_num = {}\n".format(constr_num)) cplex_script.write("chain_constr_num = {}\n".format(chains_index)) cplex_script.write(" ".join( ("my_senses = [\"G\"] * constr_num +", "[\"L\"] + [\"L\"] * chain_constr_num\n"))) cplex_script.write(" ".join( ("my_rhs = [0.0] * constr_num + [capacity] +", "[1.0] * chain_constr_num\n"))) cplex_script.write("\n") cplex_script.write("try:\n") cplex_script.write(" prob = cplex.Cplex()\n") cplex_script.write(" prob.set_error_stream(sys.stderr)\n") cplex_script.write(" prob.set_log_stream(sys.stderr)\n") cplex_script.write(" prob.set_results_stream(sys.stderr)\n") cplex_script.write(" prob.set_warning_stream(sys.stderr)\n") # cplex_script.write(" prob.parameters.mip.strategy.file.set(2)\n") cplex_script.write( " prob.parameters.mip.tolerances.mipgap.set({})\n".format(gap)) cplex_script.write( " prob.parameters.timelimit.set({})\n".format(600)) # cplex_script.write(" # prob.parameters.mip.strategy.variableselect.set(3) # strong # branching\n") cplex_script.write( " prob.objective.set_sense(prob.objective.sense.maximize)\n") cplex_script.write(" ".join( (" prob.variables.add(obj = my_obj, ub = my_ub,", "types = my_types, names = my_colnames)\n"))) cplex_script.write(" ".join( (" prob.linear_constraints.add(lin_expr = rows,", "senses = my_senses, rhs = my_rhs, names = my_rownames)\n"))) cplex_script.write(" ".join( (" prob.MIP_starts.add(cplex.SparsePair(", "ind = [i for i in range(vars_num)],", "val = [1.0] * vars_num),", "prob.MIP_starts.effort_level.auto)\n"))) cplex_script.write(" prob.solve()\n") cplex_script.write("".join( (" print (prob.solution.get_status(),", "prob.solution.status[prob.solution.get_status()],", "prob.solution.MIP.get_best_objective(),", "prob.solution.MIP.get_mip_relative_gap())\n"))) cplex_script.write("except CplexError, exc:\n") cplex_script.write(" print exc\n") # Run script, solve optimization problem, extract solution my_environ = os.environ if "ILOG_LICENSE_FILE" not in my_environ: my_environ["ILOG_LICENSE_FILE"] = \ "/local/projects/cplex/ilm/site.access.ilm" try: cplex_output_binary_str = subprocess.check_output( ["python2.6", tmpfile_name], env=my_environ, cwd=os.environ["PWD"]) except subprocess.CalledProcessError as err: os.remove(tmpfile_name) utils.error_exit("CPLEX exited with error code {}: {}\n".format( err.returncode, err.output)) # finally: # os.remove(tmpfile_name) cplex_output = cplex_output_binary_str.decode( locale.getpreferredencoding()) cplex_output_lines = cplex_output.split("\n") cplex_solution_line = cplex_output_lines[-1 if len(cplex_output_lines[-1] ) > 0 else -2] try: cplex_solution = eval(cplex_solution_line) except Exception: utils.error_exit( "Error evaluating the CPLEX solution line: {}\n".format( cplex_solution_line)) sys.stderr.write("cplex_solution={}\n".format(cplex_solution)) sys.stderr.flush() # if cplex_solution[0] not in (1, 101, 102): # utils.error_exit("CPLEX didn't find the optimal solution: {} {} # {}\n".format(cplex_solution[0], cplex_solution[1], cplex_solution[2])) # This is also an upper bound to the size of the true negative border optimal_sol_upp_bound = int( math.floor(cplex_solution[2] * (1 + cplex_solution[3]))) # Compute non-empirical VC-dimension and first candidate to epsilon_2 stats['not_emp_vc_dim'] = int(math.floor( math.log2(optimal_sol_upp_bound))) + 1 if stats['not_emp_vc_dim'] > math.log2(len(negative_border)): sys.stderr.write( "Lowering non_empirical VC-dimension to maximum value\n") stats['not_emp_vc_dim'] = int( math.floor(math.log2(len(negative_border)))) not_emp_epsilon_2 = epsilon.get_eps_vc_dim(lower_delta, ds_stats['size'], stats['not_emp_vc_dim']) sys.stderr.write(" ".join( ("items_num-1={}".format(items_num - 1), "optimal_sol_upp_bound={}".format(optimal_sol_upp_bound), "not_emp_vc_dim={}".format(stats['not_emp_vc_dim']), "not_emp_e2={}\n".format(not_emp_epsilon_2)))) sys.stderr.flush() # Loop to compute empirical VC-dimension using lengths distribution items_num_str_len = len(str(len(negative_border_items) - 1)) longer_equal = 0 for i in range(len(lengths)): cand_len = lengths[i] if cand_len == items_num: continue longer_equal += lengths_dict[cand_len] # No need to include tests to check whether cand_len is lower than # 2*ds_stats['maxlen'] if use_additional_knowledge is True: it is # always true given that cand_len <= ds_stats['maxlen'] if cand_len >= len(negative_border_items): cand_len = len(negative_border_items) - 1 # Modify the script to use the new capacity. with open(tmpfile_name, 'r+t') as cplex_script: cplex_script.seek(0) cplex_script.write("capacity = {}\n".format( str(cand_len).ljust(items_num_str_len))) # Run the script, solve optimization problem, extract solution my_environ = os.environ if "ILOG_LICENSE_FILE" not in my_environ: my_environ["ILOG_LICENSE_FILE"] = \ "/local/projects/cplex/ilm/site.access.ilm" try: cplex_output_binary_str = subprocess.check_output( ["python2.6", tmpfile_name], env=my_environ, cwd=os.environ["PWD"]) except subprocess.CalledProcessError as err: os.remove(tmpfile_name) utils.error_exit("CPLEX exited with error code {}: {}\n".format( err.returncode, err.output)) # finally: # os.remove(tmpfile_name) cplex_output = cplex_output_binary_str.decode( locale.getpreferredencoding()) cplex_output_lines = cplex_output.split("\n") cplex_solution_line = cplex_output_lines[ -1 if len(cplex_output_lines[-1]) > 0 else -2] try: cplex_solution = eval(cplex_solution_line) except Exception: utils.error_exit( "Error evaluating the CPLEX solution line: {}\n".format( cplex_solution_line)) sys.stderr.write("{}\n".format(cplex_solution)) # if cplex_solution[0] not in (1, 101, 102): # utils.error_exit("CPLEX didn't find the optimal solution: {} {} # {}\n".format(cplex_solution[0], cplex_solution[1], # cplex_solution[2])) # if cplex_solution[0] == 102: optimal_sol_upp_bound_emp = int( math.floor(cplex_solution[2] * (1 + cplex_solution[3]))) # else: # optimal_sol_upp_bound_emp = cplex_solution[0] stats['emp_vc_dim'] = int( math.floor(math.log2(optimal_sol_upp_bound_emp))) + 1 if stats['emp_vc_dim'] > math.log2(len(negative_border)): sys.stderr.write("Lowering VC-dimension to maximum value\n") stats['emp_vc_dim'] = int( math.floor(math.log2(len(negative_border)))) sys.stderr.write(" ".join( ("cand_len={}".format(cand_len), "longer_equal={}".format(longer_equal), "emp_vc_dim={}".format(stats['emp_vc_dim']), "optimal_sol_upp_bound_emp={}\n".format(optimal_sol_upp_bound_emp) ))) sys.stderr.flush() # If stopping condition is satisfied, exit. if stats['emp_vc_dim'] <= longer_equal: break # sys.stderr.write("{} {} {}\n".format(vc_dim_cand, vc_dim_cand2, # vc_dim_cand3)) os.remove(tmpfile_name) # Compute the bound to the shatter coefficient, which we use to compute # epsilon bound = min((math.log(optimal_sol_upp_bound), stats['emp_vc_dim'] * math.log(math.e * ds_stats['size'] / stats['emp_vc_dim']))) sys.stderr.write( "bound to shatter coeff: log_of_range_size={}, log_using_vc_dim={}\n". format( math.log(optimal_sol_upp_bound), stats['emp_vc_dim'] * math.log(math.e * ds_stats['size'] / stats['emp_vc_dim']))) sys.stderr.flush() # The following assert is to check that we are better than another bound to # the shatter coefficient which used the number of closed itemsets in the # base set and the size of the negative border of the base set. # Intuitively, the assert should not fail. =) assert (optimal_sol_upp_bound <= original_negative_border_len + closed_itemsets_len) # Compute second candidate to epsilon_2 emp_epsilon_2 = epsilon.get_eps_shattercoeff_bound(lower_delta, ds_stats['size'], bound, max_freq_base_set) sys.stderr.write( "cand_len={} opt_sol_upp_bound_emp={} emp_vc_dim={} bound={} max_freq_base_set={} emp_e2={}\n" .format(cand_len, optimal_sol_upp_bound_emp, stats['emp_vc_dim'], bound, max_freq_base_set, emp_epsilon_2)) sys.stderr.flush() sys.stderr.write("not_emp_e2={}, emp_e2={}\n".format( not_emp_epsilon_2, emp_epsilon_2)) sys.stderr.flush() stats['epsilon_2'] = min(emp_epsilon_2, not_emp_epsilon_2) # Extract TFIs using epsilon_2 sys.stderr.write("Extracting TFIs using epsilon_2...") sys.stderr.flush() trueFIs = dict() for itemset in reversed(freq_itemsets_1_sorted): if freq_itemsets_1_dict[itemset] >= min_freq + stats['epsilon_2']: trueFIs[itemset] = freq_itemsets_1_dict[itemset] else: break sys.stderr.write("done ({} TFIS)\n".format(len(trueFIs))) sys.stderr.flush() return (trueFIs, stats)
def generate_video(self, audio, image): ''' Encodes a video file from our audio and image input files. ''' # Check to see if our files exist at all. if not (os.path.exists(audio) and os.path.exists(image)): error_exit('please specify a valid audio and image file') in_image_ext = os.path.splitext(image)[1] in_audio_ext = os.path.splitext(audio)[1] # Check our MP3/OGG/FLAC/etc file and get its duration. probe_cmd = [self.settings['path_ffprobe'], audio] try: probe_out = subprocess.check_output( probe_cmd, stderr=subprocess.STDOUT ) if self.settings['verbose']: print(probe_out) except: error_exit('''couldn't probe the audio file \ (ffprobe might not be available)''') # Try to extract some metadata from the file using Mutagen. try: metadata = mutagen.File(audio) except AttributeError: metadata = [] # Save a human-readable version of the metadata in the object. # Keep the original Mutagen output around too. self.settings['metadata'] = {} self.settings['orig_metadata'] = metadata if metadata is not None: for tag in metadata: item = metadata[tag] # We join the item in case it's still a list, as in the case # of Vorbis. if isinstance(item, (list, tuple)): item = ''.join(item) self.settings['metadata'][self.tunetags.tag_lookup(tag)] = \ str(item) # Lift the actual track duration string out of the output. duration = re.findall('Duration: (.+?),', probe_out) # If we get valid output, parse the duration and get a seconds value. # Otherwise, stop the script. if len(duration): duration = duration[0] else: error_exit('''couldn't parse ffprobe's output. Try again with \ -v (--verbose) to see what went wrong.''') # Turn the string into a datetime format. try: audio_info = datetime.strptime(duration, '%H:%M:%S.%f') delta = timedelta( hours=audio_info.hour, minutes=audio_info.minute, seconds=audio_info.second, microseconds=audio_info.microsecond ) except ValueError: error_exit('''encountered an error trying to determine the \ duration of the audio file. It could be in an unrecognized format, or \ longer than 24 hours. (Duration: %s, exception: %s)''' % ( duration, sys.exc_info()[0] )) print('Using image file `%s\', size: %s.' % ( image, os.path.getsize(image) )) print('Using audio file `%s\', size: %s, duration: %s.' % ( audio, os.path.getsize(audio), duration )) if self.settings['metadata'] == []: print("Couldn't extract audio file tags. Continuing.") else: print('Extracted %d tag(s) from the audio file.' % len( self.settings['metadata'] )) print('Encoding video file...') # Now call ffmpeg and produce the video. ffmpeg_cmd = [ self.settings['path_ffmpeg'], # loop the video (picture) for the movie's duration '-loop', '1', # a framerate of 1fps (anything lower won't be accepted by Youtube) '-framerate', '1:1', # one input file is the picture '-i', image, # automatically overwrite on duplicate '-y', ] # Add the audio file. if in_audio_ext == '.flac': # mp4 doesn't take flac very well, so we'll convert it. ffmpeg_cmd.extend([ # one input file is the audio '-i', audio, # for compatibility with various builds, we'll use MP3 '-c:a', 'libmp3lame', # high quality CBR is good enough '-b:a', '320k', ]) else: ffmpeg_cmd.extend([ # one input file is the audio '-i', audio, # only copy the audio, don't re-encode it '-c:a', 'copy', ]) # Add the video encoding options. ffmpeg_cmd.extend([ # use x264 as the video encoder '-c:v', 'libx264', # duration of the video '-t', str(delta.total_seconds()), # 4:4:4 chroma subsampling (best quality) '-pix_fmt', 'yuv444p', # as fast as possible, at cost of filesize # (uploading likely costs less time) '-preset', 'ultrafast', # lossless quality '-qp', '0', # output self.settings['path_output'] ]) try: probe_out = subprocess.check_output( ffmpeg_cmd, stderr=subprocess.STDOUT ) if self.settings['verbose']: print(probe_out) except: error_exit('''encountered an error trying to generate the video. \ Try again with -v (--verbose) to see what went wrong. \ (Exception: %s)''' % sys.exc_info()[0]) print('Successfully generated the file `%s\'.' % self.settings['path_output'])
def get_trueFIs(exp_res_filename, eval_res_filename, min_freq, delta, gap=0.0, first_epsilon=1.0, vcdim=-1): """ Compute the True Frequent Itemsets using the 'holdout-VC' method. TODO Add more details.""" stats = dict() with open(exp_res_filename) as FILE: size_line = FILE.readline() try: size_str = size_line.split("(")[1].split(")")[0] except IndexError: utils.error_exit(" ".join( ("Cannot compute size of the explore dataset:", "'{}' is not in the recognized format\n".format(size_line)))) try: stats['exp_size'] = int(size_str) except ValueError: utils.error_exit(" ".join( ("Cannot compute size of the explore dataset:", "{} is not a number\n".format(size_str)))) with open(eval_res_filename) as FILE: size_line = FILE.readline() try: size_str = size_line.split("(")[1].split(")")[0] except IndexError: utils.error_exit(" ".join( ("Cannot compute size of the eval dataset:", "'{}' is not in the recognized format\n".format(size_line)))) try: stats['eval_size'] = int(size_str) except ValueError: utils.error_exit(" ".join( ("Cannot compute size of the eval dataset:", "'{}' is not a number\n".format(size_str)))) stats['orig_size'] = stats['exp_size'] + stats['eval_size'] exp_res = utils.create_results(exp_res_filename, min_freq) stats['exp_res'] = len(exp_res) exp_res_set = set(exp_res.keys()) eval_res = utils.create_results(eval_res_filename, min_freq) stats['eval_res'] = len(eval_res) eval_res_set = set(eval_res.keys()) intersection = exp_res_set & eval_res_set stats['holdout_intersection'] = len(intersection) stats['holdout_false_negatives'] = len(exp_res_set - eval_res_set) stats['holdout_false_positives'] = len(eval_res_set - exp_res_set) stats['holdout_jaccard'] = len(intersection) / \ len(exp_res_set | eval_res_set) # One may want to play with giving different values for the different error # probabilities, but there isn't really much point in it. lower_delta = 1.0 - math.sqrt(1 - delta) stats['epsilon_1'] = first_epsilon sys.stderr.write("Computing candidates...") sys.stderr.flush() freq_bound = min_freq + stats['epsilon_1'] candidates = [] candidates_items = set() trueFIs = dict() for itemset in exp_res: if exp_res[itemset] < freq_bound: candidates.append(itemset) candidates_items |= itemset else: # Add itemsets with frequency at last freq_bound to the TFIs trueFIs[itemset] = exp_res[itemset] sys.stderr.write("done: {} candidates ({} items)\n".format( len(candidates), len(candidates_items))) sys.stderr.flush() if len(candidates ) > 0 and vcdim > -1 and len(candidates_items) - 1 > vcdim: sys.stderr.write("Using additional knowledge\n") candidates_items_sorted = sorted(candidates_items) candidates_items_in_sets_dict = dict() candidates_itemset_index = 0 itemset_indexes_dict = dict() for first_itemset_index in range(len(candidates)): first_itemset = candidates[first_itemset_index] for item in first_itemset: if item in candidates_items_in_sets_dict: candidates_items_in_sets_dict[item].append( candidates_itemset_index) else: candidates_items_in_sets_dict[item] = \ [candidates_itemset_index, ] itemset_indexes_dict[first_itemset] = candidates_itemset_index candidates_itemset_index += 1 # Compute an upper-bound to the VC-dimension of the set of candidates. constr_start_str = "cplex.SparsePair(ind = [" constr_end_str = "], val = vals)" vars_num = len(candidates) + len(candidates_items) constr_names = [] capacity = vcdim (tmpfile_handle, tmpfile_name) = tempfile.mkstemp(prefix="cplx", dir=os.environ['PWD'], text=True) os.close(tmpfile_handle) with open(tmpfile_name, 'wt') as cplex_script: cplex_script.write("capacity = {}\n".format(capacity)) cplex_script.write("import cplex, os, sys\n") cplex_script.write("from cplex.exceptions import CplexError\n") cplex_script.write("\n") cplex_script.write("\n") cplex_script.write(" ".join( ("os.environ[\"ILOG_LICENSE_FILE\"] =" "\"/local/projects/cplex/ilm/site.access.ilm\"\n"))) cplex_script.write("vals = [-1.0, 1.0]\n") cplex_script.write("sets_num = {}\n".format(len(candidates))) cplex_script.write("items_num = {}\n".format( len(candidates_items))) cplex_script.write("vars_num = {}\n".format(vars_num)) cplex_script.write("my_ub = [1.0] * vars_num\n") cplex_script.write( "my_types = \"\".join(\"I\" for i in range(vars_num))\n") cplex_script.write( "my_obj = ([1.0] * sets_num) + ([0.0] * items_num)\n") cplex_script.write(" ".join( ("my_colnames =" "[\"set{0}\".format(i) for i in range(sets_num)]", "+ [\"item{0}\".format(j) for j in range(items_num)]\n"))) cplex_script.write("rows = [ ") sys.stderr.write("Writing knapsack constraints...") sys.stderr.flush() constr_num = 0 for item_index in range(len(candidates_items)): try: for itemset_index in \ candidates_items_in_sets_dict[ candidates_items_sorted[item_index]]: constr_str = "".join( (constr_start_str, "\"set{}\",\"item{}\"".format( itemset_index, item_index), constr_end_str)) cplex_script.write("{},".format(constr_str)) constr_num += 1 name = "s{}i{}".format(item_index, itemset_index) constr_names.append(name) except KeyError: sys.stderr.write(" ".join( ("item_index={}".format(item_index), "candidates_items_sorted[item_index]={}\n".format( candidates_items_sorted[item_index])))) in_candidates = False candidates_itemset = frozenset() for itemset in candidates: if candidates_items_sorted[item_index] in itemset: in_candidates = True candidates_itemset = itemset break sys.stderr.write( "{} in negative_border: {}. Itemset: {}\n".format( candidates_items_sorted[item_index], in_candidates, candidates_itemset)) sys.exit(1) # Create capacity constraints and write it to script constr_str = "".join( (constr_start_str, ",".join("\"item{}\"".format(j) for j in range(len(candidates_items))), "], val=[", ",".join("1.0" for j in range(len(candidates_items))), "])")) cplex_script.write(constr_str) cplex_script.write("]\n") cap_constr_name = "capacity" constr_names.append(cap_constr_name) sys.stderr.write("done\n") sys.stderr.flush() sys.stderr.write(" ".join( ("Optimization problem: capacity={}".format(capacity), "vars_num={}".format(vars_num), "candidates={}".format(len(candidates)), "candidates_items_num={}".format(len(candidates_items)), "constr_num={}\n".format(constr_num)))) sys.stderr.flush() cplex_script.write("my_rownames = {}\n".format(constr_names)) cplex_script.write("constr_num = {}\n".format(constr_num)) cplex_script.write("my_senses = [\"G\"] * constr_num + [\"L\"]\n") cplex_script.write("my_rhs = [0.0] * constr_num + [capacity]\n") cplex_script.write("\n") cplex_script.write("try:\n") cplex_script.write(" prob = cplex.Cplex()\n") cplex_script.write(" prob.set_error_stream(sys.stderr)\n") cplex_script.write(" prob.set_log_stream(sys.stderr)\n") cplex_script.write(" prob.set_results_stream(sys.stderr)\n") cplex_script.write(" prob.set_warning_stream(sys.stderr)\n") # cplex_script.write(" # prob.parameters.mip.strategy.file.set(2)\n") cplex_script.write( " prob.parameters.mip.tolerances.mipgap.set({})\n".format( gap)) cplex_script.write( " prob.parameters.timelimit.set({})\n".format(600)) # cplex_script.write(" # prob.parameters.mip.strategy.variableselect.set(3) # strong # branching\n") cplex_script.write("".join((" prob.objective.set_sense(", "prob.objective.sense.maximize)\n"))) cplex_script.write(" ".join( (" prob.variables.add(obj = my_obj, ub = my_ub,", "types = my_types, names = my_colnames)\n"))) cplex_script.write(" ".join( (" prob.linear_constraints.add(lin_expr = rows,", "senses = my_senses, rhs = my_rhs,", "names = my_rownames)\n"))) cplex_script.write(" ".join( (" prob.MIP_starts.add(cplex.SparsePair(ind =", "[i for i in range(vars_num)], val = [1.0] * vars_num),", "prob.MIP_starts.effort_level.auto)\n"))) cplex_script.write(" prob.solve()\n") cplex_script.write(",".join( (" print (prob.solution.get_status()", "prob.solution.status[prob.solution.get_status()]", "prob.solution.MIP.get_best_objective()" "prob.solution.MIP.get_mip_relative_gap())\n"))) cplex_script.write("except CplexError, exc:\n") cplex_script.write(" print exc\n") # Run script, solve optimization problem, extract solution my_environ = os.environ if "ILOG_LICENSE_FILE" not in my_environ: my_environ["ILOG_LICENSE_FILE"] = \ "/local/projects/cplex/ilm/site.access.ilm" try: cplex_output_binary_str = subprocess.check_output( ["python2.6", tmpfile_name], env=my_environ, cwd=os.environ["PWD"]) except subprocess.CalledProcessError as err: os.remove(tmpfile_name) utils.error_exit("CPLEX exited with error code {}: {}\n".format( err.returncode, err.output)) # finally: # os.remove(tmpfile_name) cplex_output = cplex_output_binary_str.decode( locale.getpreferredencoding()) cplex_output_lines = cplex_output.split("\n") cplex_solution_line = cplex_output_lines[ -1 if len(cplex_output_lines[-1]) > 0 else -2] try: cplex_solution = eval(cplex_solution_line) except Exception: utils.error_exit( "Error evaluating the CPLEX solution line: {}\n".format( cplex_solution_line)) sys.stderr.write("cplex_solution={}\n".format(cplex_solution)) sys.stderr.flush() # if cplex_solution[0] not in (1, 101, 102): # utils.error_exit("CPLEX didn't find the optimal solution: {} {} # {}\n".format(cplex_solution[0], cplex_solution[1], # cplex_solution[2])) optimal_sol_upp_bound = int( math.floor(cplex_solution[2] * (1 + cplex_solution[3]))) stats['vcdim'] = int(math.floor(math.log2(optimal_sol_upp_bound))) + 1 if stats['vcdim'] > math.log2(len(candidates)): sys.stderr.write("Lowering VC-dimension to maximum value\n") sys.stderr.flush() stats['vcdim'] = int(math.floor(math.log2(len(candidates)))) stats['epsilon_2_vc'] = epsilon.get_eps_vc_dim(lower_delta, stats['orig_size'], stats['vcdim']) elif len(candidates ) > 0 and vcdim > -1 and len(candidates_items) - 1 <= vcdim: sys.stderr.write("Additional knowledge is useless\n") sys.stderr.flush() stats['vcdim'] = int(math.floor(math.log2(len(candidates)))) stats['epsilon_2_vc'] = epsilon.get_eps_vc_dim(lower_delta, stats['orig_size'], stats['vcdim']) elif len(candidates) > 0 and vcdim == -1: sys.stderr.write("Not using additional knowledge\n") sys.stderr.flush() stats['vcdim'] = int(math.floor(math.log2(len(candidates)))) stats['epsilon_2_vc'] = epsilon.get_eps_vc_dim(lower_delta, stats['orig_size'], stats['vcdim']) else: sys.stderr.write("There are no candidates\n") sys.stderr.flush() stats['vcdim'] = 0 stats['epsilon_2_vc'] = 0 # Loop to compute empirical VC-dimension using lengths distribution capacity_str_len = len(str(capacity)) longer_equal = 0 lengths_dict = ds_stats['lengths'] lengths = sorted(lengths_dict.keys(), reverse=True) start_len_idx = 0 while start_len_idx < len(lengths): if lengths[start_len_idx] > len(candidates_items) - 1: longer_equal += lengths_dict[start_len_idx] start_len_idx += 1 else: break for i in range(start_len_idx, len(lengths)): cand_len = lengths[i] longer_equal += lengths_dict[cand_len] # Modify the script to use the new capacity. with open(tmpfile_name, 'r+t') as cplex_script: cplex_script.seek(0) cplex_script.write("capacity = {}\n".format( str(cand_len).ljust(capacity_str_len))) # Run the script, solve optimization problem, extract solution my_environ = os.environ if "ILOG_LICENSE_FILE" not in my_environ: my_environ["ILOG_LICENSE_FILE"] = \ "/local/projects/cplex/ilm/site.access.ilm" try: cplex_output_binary_str = subprocess.check_output( ["python2.6", tmpfile_name], env=my_environ, cwd=os.environ["PWD"]) except subprocess.CalledProcessError as err: os.remove(tmpfile_name) utils.error_exit("CPLEX exited with error code {}: {}\n".format( err.returncode, err.output)) # finally: # os.remove(tmpfile_name) cplex_output = cplex_output_binary_str.decode( locale.getpreferredencoding()) cplex_output_lines = cplex_output.split("\n") cplex_solution_line = cplex_output_lines[ -1 if len(cplex_output_lines[-1]) > 0 else -2] try: cplex_solution = eval(cplex_solution_line) except Exception: utils.error_exit( "Error evaluating the CPLEX solution line: {}\n".format( cplex_solution_line)) sys.stderr.write("{}\n".format(cplex_solution)) # if cplex_solution[0] not in (1, 101, 102): # utils.error_exit("CPLEX didn't find the optimal solution: {} {} # {}\n".format(cplex_solution[0], cplex_solution[1], # cplex_solution[2])) # if cplex_solution[0] == 102: optimal_sol_upp_bound_emp = int( math.floor(cplex_solution[2] * (1 + cplex_solution[3]))) # else: # optimal_sol_upp_bound_emp = cplex_solution[0] stats['emp_vc_dim'] = int( math.floor(math.log2(optimal_sol_upp_bound_emp))) + 1 if stats['emp_vc_dim'] > math.log2(len(negative_border)): sys.stderr.write("Lowering VC-dimension to maximum value\n") stats['emp_vc_dim'] = int( math.floor(math.log2(len(negative_border)))) sys.stderr.write(" ".join( ("cand_len={}".format(cand_len), "longer_equal={}".format(longer_equal), "emp_vc_dim={}".format(stats['emp_vc_dim']), "optimal_sol_upp_bound_emp={}\n".format(optimal_sol_upp_bound_emp) ))) sys.stderr.flush() # If stopping condition is satisfied, exit. if stats['emp_vc_dim'] <= longer_equal: break os.remove(tmpfile_name) # Compute the bound to the shatter coefficient, which we use to compute # epsilon bound = min((math.log(len(candidates)), stats['emp_vc_dim'] * math.log(math.e * stats['eval_size'] / stats['emp_vc_dim']))) # Compute second candidate to epsilon_2 emp_epsilon_2 = epsilon.get_eps_shattercoeff_bound(lower_delta, stats['eval_size'], bound, max_freq_base_set) sys.stderr.write( "cand_len={} opt_sol_upp_bound_emp={} emp_vc_dim={} bound={} max_freq_base_set={} emp_e2={}\n" .format(cand_len, optimal_sol_upp_bound_emp, stats['emp_vc_dim'], bound, max_freq_base_set, emp_epsilon_2)) sys.stderr.flush() sys.stderr.write("not_emp_e2={}, emp_e2={}\n".format( stats['epsilon_2_vc'], emp_epsilon_2)) sys.stderr.flush() stats['epsilon_2'] = min(emp_epsilon_2, stats['epsilon_2_vc']) if len(candidates) > 0: sys.stderr.write("Computing the candidates that are TFIs...") sys.stderr.flush() freq_bound = min_freq + stats['epsilon_2'] eval_res_itemsets = frozenset(eval_res.keys()) for itemset in sorted(frozenset(candidates) & eval_res_itemsets, key=lambda x: eval_res[x], reverse=True): if eval_res[itemset] >= freq_bound: trueFIs[itemset] = eval_res[itemset] sys.stderr.write("done\n") sys.stderr.flush() return (trueFIs, stats)
class Tune2Tube(object): def __init__(self): self.settings = { # ffmpeg is a dependency for this script. ffprobe should be # installed along with ffmpeg. 'path_ffmpeg': 'ffmpeg', 'path_ffprobe': 'ffprobe', # Temporary output filename. 'path_output': 'tmp.mp4', # Version number. 't2t_version': '0.1', # Whether to display ffmpeg/ffprobe output. 'verbose': False, # Whether to only generate the video file without uploading it. 'generate_only': False, # Whether to forego the usage of stored oauth2 tokens. # If set to True, you will need to authenticate using your # browser each time you use the script. 'no_stored_auth': False, # Default title to use in case the user's own title is # an empty string. 'default_title': '(Empty title)', # Default variables to use for the dynamically generated title. 'default_title_vars': 'artist,title', # Whether to use the dynamically generated title # from the file's metadata. 'dynamic_title': True, 'title': None, 'title_vars': None } # Explicitly tell the underlying HTTP transport library not to retry, # since we are handling retry logic ourselves. httplib2.RETRIES = 1 # Maximum number of times to retry before giving up. self.max_retries = 10 # Always retry when these exceptions are raised. self.retriable_exceptions = (httplib2.HttpLib2Error, IOError, httplib.NotConnected, httplib.IncompleteRead, httplib.ImproperConnectionState, httplib.CannotSendRequest, httplib.CannotSendHeader, httplib.ResponseNotReady, httplib.BadStatusLine) # Always retry when an apiclient.errors.HttpError with one of these # status codes is raised. self.retriable_status_codes = [500, 502, 503, 504] # This OAuth 2.0 access scope allows an application to upload files to # the authenticated user's YouTube channel, but doesn't allow other # types of access. self.youtube_base = 'https://www.googleapis.com' self.youtube_upload_scope = self.youtube_base + '/auth/youtube.upload' self.youtube_api_service_name = 'youtube' self.youtube_api_version = 'v3' # We can set our uploaded video to one of these statuses. self.valid_privacy_statuses = ('public', 'private', 'unlisted') # This variable defines a message to display if # the client_secrets_file is missing. self.missing_client_secrets_message = ''' %s: Error: Please configure OAuth 2.0. To make this script run you will need to populate the client_secrets.json file found at: %s with information from the Developers Console, which can be accessed through <https://console.developers.google.com/>. See the README.md file for more details. ''' # Set up our command line argument parser. # The argparser is initialized in oauth2client/tools.py. We're just # adding our own arguments to the ones already defined there. argparser.description = '''Generates a video from an image and audio \ file and uploads it to Youtube.''' argparser.epilog = '''A Youtube Data API client key is required to \ use this script, as well as ffmpeg. For help on setting up these \ dependencies, see this project\'s Github page \ <http://github.com/msikma/tune2tube/> or the included README.md file.''' argparser.add_help = True # Manually add a help argument, # as it is turned off in oauth2client/tools.py. argparser.add_argument('--no_stored_auth', action='store_true', help='Forego using stored oauth2 tokens.') argparser.add_argument('audio_file', help='Audio file (MP3, OGG, FLAC, etc).') argparser.add_argument('image_file', help='Image file (PNG, JPG, etc).') argparser.add_argument( '--output', help='''Save the output video (.MP4) to a file rather than \ uploading it to Youtube.''') argparser.add_argument('--cs_json', help='''Path to the client secrets json file \ (default: client_secrets.json).''', default='client_secrets.json') argparser.add_argument( '--privacy', choices=self.valid_privacy_statuses, help='Privacy status of the video (default: unlisted).', default='unlisted') argparser.add_argument( '--category', default='10', help='''Numeric video category (see the Github wiki for a list; \ the default is 10, Music).''') argparser.add_argument( '--keywords', help='Comma-separated list of video keywords/tags.', default='') mxgroup = argparser.add_mutually_exclusive_group() mxgroup.add_argument( '--title', help='''Video title string (default: \'%s\'). If neither --title \ nor --title_vars is specified, --title_vars will be used with its default \ value, unless this would result in \ an empty title.''' % self.settings['default_title']) mxgroup.add_argument( '--title_vars', nargs='?', help='''Comma-separated list of metadata variables to use as \ the video title (default: %s).''' % self.settings['default_title_vars']) argparser.add_argument( '--title_sep', help='''Separator for the title variables (default: \' - \', \ yielding e.g. \'Artist - Title\'). Ignored if \ using --title_str.''', default=' - ') argparser.add_argument( '--description', nargs='?', help='Video description string (default: empty string).', default='') argparser.add_argument( '--add_metadata', action='store_true', help='''Adds a list of audio file metadata to the \ description (default: True).''', default=True) argparser.add_argument('-V', '--version', action='version', version='%(prog)s ' + self.settings['t2t_version'], help='Show version number and exit.') mxgroup = argparser.add_mutually_exclusive_group() mxgroup.add_argument( '-v', '--verbose', action='store_true', help='Verbose mode (display ffmpeg/ffprobe output).') mxgroup.add_argument('-q', '--quiet', action='store_true', help='Quiet mode.') argparser.add_argument('-h', '--help', action='help', default=argparse.SUPPRESS, help='Show this help message and exit.') self.tunetags = TuneTags() def get_authenticated_service(self, args): ''' Get authenticated and cache the result. ''' flow = flow_from_clientsecrets( self.settings['client_secrets_file'], scope=self.youtube_upload_scope, message=self.missing_client_secrets_message % ('tune2tube.py', os.path.abspath( os.path.join(os.path.dirname(__file__), self.settings['client_secrets_file'])))) storage = Storage('%s-oauth2.json' % 'tune2tube.py') credentials = storage.get() if credentials is None or credentials.invalid \ or self.settings['no_stored_auth']: credentials = run_flow(flow, storage, args) return build(self.youtube_api_service_name, self.youtube_api_version, http=credentials.authorize(httplib2.Http())) def initialize_upload(self, youtube, args, upfile): ''' Begin a resumable video upload. ''' tags = None if self.settings['keywords']: tags = self.settings['keywords'].split(',') # If we need to generate a dynamic title, do so now. if self.settings['dynamic_title']: title_vars = self.settings['title_vars'].split(',') items = [ self.settings['metadata'][n] for n in title_vars if n in self.settings['metadata'] ] title = self.settings['title_sep'].join(items) else: title = self.settings['title'] if title == '': title = '(no title)' # Add the metadata tags to the description if needed. description = self.settings['description'].strip() if self.settings['add_metadata']: if description is not '': description += '\n' # Sort the list of metadata, so that items with linebreaks go last. metalist = [{ key: self.settings['metadata'][key] } for key in self.settings['metadata']] metalist = sorted(metalist, key=lambda x: '\n' in list(x.values())[0]) for tag in metalist: for key in tag: if "APIC" in key: continue value = tag[key] nice_key = self.tunetags.tag_lookup(key, True) if '\n' in value: description += '\n----\n%s: %s\n' % (nice_key, value) else: description += '\n%s: %s' % (nice_key, value) body = { 'snippet': { 'title': title, 'description': description, 'tags': tags, 'categoryId': self.settings['category'] }, 'status': { 'privacyStatus': self.settings['privacy'] } } # Call the API's videos.insert method to create and upload the video. insert_request = youtube.videos().insert(part=','.join(body.keys()), body=body, media_body=MediaFileUpload( upfile, chunksize=-1, resumable=True)) filesize = os.path.getsize(upfile) print('Uploading file... (filesize: %s)' % bytes_to_human(filesize)) self.resumable_upload(insert_request) def resumable_upload(self, insert_request): ''' This method implements an exponential backoff strategy to resume a failed upload. ''' response = None error = None retry = 0 while response is None: try: status, response = insert_request.next_chunk() if 'id' in response: print('''Video ID `%s' was successfully uploaded. \ Its visibility is set to `%s'.''' % (response['id'], self.settings['privacy'])) print('''URL of the newly uploaded video: \ <https://www.youtube.com/watch?v=%s>''' % response['id']) print('''It may take some time for the video to \ finish processing; typically 1-10 minutes.''') else: error_exit('''The upload failed with an unexpected \ response: %s''' % response) except HttpError, e: if e.resp.status in self.retriable_status_codes: error = '''A retriable HTTP error %d occurred:\n%s''' % ( e.resp.status, e.content) else: raise except self.retriable_exceptions, e: error = 'A retriable error occurred: %s' % e if error is not None: print(error) retry += 1 if retry > self.max_retries: error_exit('''Too many upload errors. No longer \ attempting to retry.''') max_sleep = 2**retry sleep_seconds = random.random() * max_sleep print('''Sleeping %f seconds and then \ retrying...''' % sleep_seconds) time.sleep(sleep_seconds)
def main(): # Verify arguments if len(sys.argv) != 7: utils.error_exit("Usage: {} first_epsilon delta min_freq pvalue_mode exploreres evalres\n".format(os.path.basename(sys.argv[0]))) exp_res_filename = sys.argv[5] if not os.path.isfile(exp_res_filename): utils.error_exit("{} does not exist, or is not a file\n".format(exp_res_filename)) eval_res_filename = sys.argv[6] if not os.path.isfile(eval_res_filename): utils.error_exit("{} does not exist, or is not a file\n".format(eval_res_filename)) pvalue_mode = sys.argv[4].upper() if pvalue_mode != "C" and pvalue_mode != "E" and pvalue_mode != "W": utils.error_exit("p-value mode must be 'c', 'e', or 'w'. You passed {}\n".format(pvalue_mode)) try: first_epsilon = float(sys.argv[1]) except ValueError: utils.error_exit("{} is not a number\n".format(sys.argv[1])) try: delta = float(sys.argv[2]) except ValueError: utils.error_exit("{} is not a number\n".format(sys.argv[2])) try: min_freq = float(sys.argv[3]) except ValueError: utils.error_exit("{} is not a number\n".format(sys.argv[3])) (trueFIs, stats) = get_trueFIs(exp_res_filename, eval_res_filename, min_freq, delta, pvalue_mode, first_epsilon) utils.print_itemsets(trueFIs, stats['orig_size']) sys.stderr.write("exp_res_file={},eval_res_file={},pvalue_mode={},d={},min_freq={},trueFIs={}\n".format(os.path.basename(exp_res_filename),os.path.basename(eval_res_filename), pvalue_mode, delta, min_freq, len(trueFIs))) sys.stderr.write("orig_size={},exp_size={},eval_size={}\n".format(stats['orig_size'], stats['exp_size'], stats['eval_size'])) sys.stderr.write("exp_res={},exp_res_filtered={},eval_res={}\n".format(stats['exp_res'], stats['exp_res_filtered'], stats['eval_res'])) sys.stderr.write("filter_epsilon={},tfis_from_exp={}\n".format(stats['filter_epsilon'], stats['tfis_from_exp'])) sys.stderr.write("holdout_intersection={},holdout_false_negatives={}\n".format(stats['holdout_intersection'], stats['holdout_false_negatives'])) sys.stderr.write("critical_value={},removed={},epsilon={}\n".format(stats['critical_value'], stats['removed'], stats['epsilon'])) sys.stderr.write("exp_res_file,eval_res_file,pvalue_mode,delta,min_freq,trueFIs,orig_size,exp_size,eval_size,exp_res,exp_res_filtered,eval_res,filter_epsilon,tfis_from_exp,holdout_intersection,holdout_false_negatives,critical_value,removed,epsilon\n") sys.stderr.write("{}\n".format(",".join((str(i) for i in (os.path.basename(exp_res_filename), os.path.basename(eval_res_filename), pvalue_mode, delta, min_freq,len(trueFIs), stats['orig_size'], stats['exp_size'], stats['eval_size'], stats['exp_res'], stats['exp_res_filtered'], stats['eval_res'], stats['filter_epsilon'], stats['tfis_from_exp'], stats['holdout_intersection'], stats['holdout_false_negatives'], stats['critical_value'], stats['removed'], stats['epsilon'])))))
def main(): # Verify arguments if len(sys.argv) != 7: utils.error_exit( "Usage: {} first_epsilon delta min_freq pvalue_mode exploreres evalres\n" .format(os.path.basename(sys.argv[0]))) exp_res_filename = sys.argv[5] if not os.path.isfile(exp_res_filename): utils.error_exit( "{} does not exist, or is not a file\n".format(exp_res_filename)) eval_res_filename = sys.argv[6] if not os.path.isfile(eval_res_filename): utils.error_exit( "{} does not exist, or is not a file\n".format(eval_res_filename)) pvalue_mode = sys.argv[4].upper() if pvalue_mode != "C" and pvalue_mode != "E" and pvalue_mode != "W": utils.error_exit( "p-value mode must be 'c', 'e', or 'w'. You passed {}\n".format( pvalue_mode)) try: first_epsilon = float(sys.argv[1]) except ValueError: utils.error_exit("{} is not a number\n".format(sys.argv[1])) try: delta = float(sys.argv[2]) except ValueError: utils.error_exit("{} is not a number\n".format(sys.argv[2])) try: min_freq = float(sys.argv[3]) except ValueError: utils.error_exit("{} is not a number\n".format(sys.argv[3])) (trueFIs, stats) = get_trueFIs(exp_res_filename, eval_res_filename, min_freq, delta, pvalue_mode, first_epsilon) utils.print_itemsets(trueFIs, stats['orig_size']) sys.stderr.write( "exp_res_file={},eval_res_file={},pvalue_mode={},d={},min_freq={},trueFIs={}\n" .format(os.path.basename(exp_res_filename), os.path.basename(eval_res_filename), pvalue_mode, delta, min_freq, len(trueFIs))) sys.stderr.write("orig_size={},exp_size={},eval_size={}\n".format( stats['orig_size'], stats['exp_size'], stats['eval_size'])) sys.stderr.write("exp_res={},exp_res_filtered={},eval_res={}\n".format( stats['exp_res'], stats['exp_res_filtered'], stats['eval_res'])) sys.stderr.write("filter_epsilon={},tfis_from_exp={}\n".format( stats['filter_epsilon'], stats['tfis_from_exp'])) sys.stderr.write( "holdout_intersection={},holdout_false_negatives={}\n".format( stats['holdout_intersection'], stats['holdout_false_negatives'])) sys.stderr.write("critical_value={},removed={},epsilon={}\n".format( stats['critical_value'], stats['removed'], stats['epsilon'])) sys.stderr.write( "exp_res_file,eval_res_file,pvalue_mode,delta,min_freq,trueFIs,orig_size,exp_size,eval_size,exp_res,exp_res_filtered,eval_res,filter_epsilon,tfis_from_exp,holdout_intersection,holdout_false_negatives,critical_value,removed,epsilon\n" ) sys.stderr.write("{}\n".format(",".join( (str(i) for i in (os.path.basename(exp_res_filename), os.path.basename(eval_res_filename), pvalue_mode, delta, min_freq, len(trueFIs), stats['orig_size'], stats['exp_size'], stats['eval_size'], stats['exp_res'], stats['exp_res_filtered'], stats['eval_res'], stats['filter_epsilon'], stats['tfis_from_exp'], stats['holdout_intersection'], stats['holdout_false_negatives'], stats['critical_value'], stats['removed'], stats['epsilon'])))))
def main(): # Verify arguments if len(sys.argv) != 7: utils.error_exit( " ".join(( "Usage: {}".format(os.path.basename(sys.argv[0])), "use_additional_knowledge={{0|1}} delta min_freq mode={{c|e}}", "dataset results_filename\n"))) dataset = sys.argv[5] res_filename = sys.argv[6] if not os.path.isfile(res_filename): utils.error_exit( "{} does not exist, or is not a file\n".format(res_filename)) pvalue_mode = sys.argv[4].upper() if pvalue_mode != "C" and pvalue_mode != "E" and pvalue_mode != "W": utils.error_exit( "p-value mode must be 'c', 'e', or 'w'. You passed {}\n".format( pvalue_mode)) try: use_additional_knowledge = int(sys.argv[1]) except ValueError: utils.error_exit("{} is not a number\n".format(sys.argv[1])) try: delta = float(sys.argv[2]) except ValueError: utils.error_exit("{} is not a number\n".format(sys.argv[2])) try: min_freq = float(sys.argv[3]) except ValueError: utils.error_exit("{} is not a number\n".format(sys.argv[3])) ds_stats = getDatasetInfo.get_ds_stats(dataset) (trueFIs, stats) = get_trueFIs(ds_stats, res_filename, min_freq, delta, pvalue_mode, use_additional_knowledge) utils.print_itemsets(trueFIs, ds_stats['size']) sys.stderr.write( ",".join( ("res_file={}".format(os.path.basename(res_filename)), "use_add_knowl={}".format(use_additional_knowledge), "pvalue_mode={}".format(pvalue_mode), "d={}".format(delta), "min_freq={}".format(min_freq), "trueFIs={}\n".format(len(trueFIs))))) sys.stderr.write( ",".join( ("union_bound_factor={}".format(stats['union_bound_factor']), "critical_value={}".format(stats['critical_value']), "removed={}".format(stats['removed']), "epsilon={}\n".format(stats['epsilon'])))) sys.stderr.write( ",".join( ("res_file,add_knowl,pvalue_mode,delta,min_freq,trueFIs", "union_bound_factor,critical_value,removed,epsilon\n"))) sys.stderr.write("{}\n".format( ",".join( (str(i) for i in (os.path.basename(res_filename), use_additional_knowledge, pvalue_mode, delta, min_freq, len(trueFIs), stats['union_bound_factor'], stats['critical_value'], stats['removed'], stats['epsilon'])))))
def main(environment, game, level, player_img, use_graph, draw_all_labels, draw_dup_labels, draw_path, show_score, process, gen_prolog, dimensions, structure, summary, runtime, prolog): # Set environment variable if environment not in ENVIRONMENTS: utils.error_exit( "invalid environment - environment must be one of %s" % str(ENVIRONMENTS)) if environment == 'maze': os.environ['MAZE'] = "1" if dimensions or structure or summary: if dimensions: print(Level.get_level_dimensions_in_tiles(game, level)) if structure: Level.print_structural_txt(game, level) if summary: Level.print_tile_summary(game, level) Level.print_start_goal_tile_locations(game, level) print("Num gaps: %d" % Level.get_num_gaps(game, level)) exit(0) if runtime: import json all_levels_process_info_file = utils.get_filepath( "", "all_levels_process_info.pickle") if not os.path.exists(all_levels_process_info_file): utils.error_exit("%s file not found" % all_levels_process_info_file) all_levels_process_info = utils.read_pickle( all_levels_process_info_file) cur_game_level = "%s/%s" % (game, level) for process_key, process_runtimes in all_levels_process_info.items(): if process_key == cur_game_level: print("----- Process Script Runtimes -----") print("Game: %s" % game) print("Level: %s" % level) print(json.dumps(process_runtimes, indent=2)) exit(0) utils.error_exit("Run 'pypy3 main.py <environment> %s %s --process'" % (game, level)) if prolog: import json all_prolog_info_file = utils.get_filepath( "level_saved_files_block/prolog_files", "all_prolog_info.pickle") if not os.path.exists(all_prolog_info_file): utils.error_exit("%s file not found" % all_prolog_info_file) all_prolog_info = utils.read_pickle(all_prolog_info_file) prolog_exists = all_prolog_info.get(level) if prolog_exists: print("----- Prolog Info -----") print("Game: %s" % game) print("Level: %s" % level) for key, item in prolog_exists.items(): print("%s: %s" % (key, str(item))) exit(0) utils.error_exit( "Run 'python main.py <environment> %s %s --gen_prolog'" % (game, level)) if process: print("----- Creating Uniform Txt Layer File -----") Level.get_uniform_tile_chars(game, level) print("---- Processing Level -----") print("Game: %s" % game) print("Level: %s" % level) process_runtimes = [] import enumerate state_graph_file, runtime = enumerate.main(game, level, player_img) process_runtimes.append(('enumerate', runtime)) import extract_metatiles unique_metatiles_file, metatile_coords_dict_file, runtime = extract_metatiles.main( save_filename=level, player_img=player_img, print_stats=False, state_graph_files=[state_graph_file]) process_runtimes.append(('extract_metatiles', runtime)) import get_metatile_id_map id_metatile_map_file, metatile_id_map_file, runtime = get_metatile_id_map.main( save_filename=level, unique_metatiles_file=unique_metatiles_file, player_img=player_img) process_runtimes.append(('get_metatile_id_map', runtime)) import get_tile_id_coords_map tile_id_extra_info_coords_map_file, runtime = get_tile_id_coords_map.main( game, level, metatile_coords_dict_file, metatile_id_map_file, player_img) process_runtimes.append(('get_tile_id_coords_map', runtime)) import get_states_per_metatile runtime = get_states_per_metatile.main( save_filename=level, unique_metatiles_file=unique_metatiles_file, player_img=player_img, print_stats=False) process_runtimes.append(('get_states_per_metatile', runtime)) import extract_constraints metatile_constraints_file, runtime = extract_constraints.main( save_filename=level, metatile_id_map_file=metatile_id_map_file, id_metatile_map_file=id_metatile_map_file, metatile_coords_dict_files=[metatile_coords_dict_file], player_img=player_img) process_runtimes.append(('extract_constraints', runtime)) save_process_runtimes(process_key="%s/%s" % (game, level), process_runtimes=process_runtimes) if gen_prolog: import gen_prolog metatile_constraints_file = "level_saved_files_%s/metatile_constraints/%s.pickle" % ( player_img, level) if not os.path.exists(metatile_constraints_file): utils.error_exit( "%s file does not exist. Run 'pypy3 main.py %s %s %s --process' first" % (metatile_constraints_file, environment, game, level)) prolog_file, runtime = gen_prolog.main( tile_constraints_file=metatile_constraints_file, debug=False, print_pl=False, save=True) save_process_runtimes(process_key="%s/%s" % (game, level), process_runtimes=[('gen_prolog', runtime)]) if not (process or gen_prolog): import platformer platformer.main(game, level, player_img, use_graph, draw_all_labels, draw_dup_labels, draw_path, show_score)
def generate_video(self, audio, image): ''' Encodes a video file from our audio and image input files. ''' # Check to see if our files exist at all. if not (os.path.exists(audio) and os.path.exists(image)): error_exit('please specify a valid audio and image file') in_image_ext = os.path.splitext(image)[1] in_audio_ext = os.path.splitext(audio)[1] # Check our MP3/OGG/FLAC/etc file and get its duration. probe_cmd = [self.settings['path_ffprobe'], audio] try: probe_out = subprocess.check_output(probe_cmd, stderr=subprocess.STDOUT) if self.settings['verbose']: print(probe_out) except: error_exit('''couldn't probe the audio file \ (ffprobe might not be available)''') # Try to extract some metadata from the file using Mutagen. try: metadata = mutagen.File(audio) except AttributeError: metadata = [] # Save a human-readable version of the metadata in the object. # Keep the original Mutagen output around too. self.settings['metadata'] = {} self.settings['orig_metadata'] = metadata if metadata is not None: for tag in metadata: item = metadata[tag] # We join the item in case it's still a list, as in the case # of Vorbis. if isinstance(item, (list, tuple)): item = ''.join(item) self.settings['metadata'][self.tunetags.tag_lookup(tag)] = \ str(item) # Lift the actual track duration string out of the output. duration = re.findall('Duration: (.+?),', probe_out) # If we get valid output, parse the duration and get a seconds value. # Otherwise, stop the script. if len(duration): duration = duration[0] else: error_exit('''couldn't parse ffprobe's output. Try again with \ -v (--verbose) to see what went wrong.''') # Turn the string into a datetime format. try: audio_info = datetime.strptime(duration, '%H:%M:%S.%f') delta = timedelta(hours=audio_info.hour, minutes=audio_info.minute, seconds=audio_info.second, microseconds=audio_info.microsecond) except ValueError: error_exit('''encountered an error trying to determine the \ duration of the audio file. It could be in an unrecognized format, or \ longer than 24 hours. (Duration: %s, exception: %s)''' % (duration, sys.exc_info()[0])) print('Using image file `%s\', size: %s.' % (image, os.path.getsize(image))) print('Using audio file `%s\', size: %s, duration: %s.' % (audio, os.path.getsize(audio), duration)) if self.settings['metadata'] == []: print("Couldn't extract audio file tags. Continuing.") else: print('Extracted %d tag(s) from the audio file.' % len(self.settings['metadata'])) print('Encoding video file...') # Now call ffmpeg and produce the video. ffmpeg_cmd = [ self.settings['path_ffmpeg'], # loop the video (picture) for the movie's duration '-loop', '1', # a framerate of 1fps (anything lower won't be accepted by Youtube) '-framerate', '1:1', # one input file is the picture '-i', image, # automatically overwrite on duplicate '-y', ] # Add the audio file. if in_audio_ext == '.flac': # mp4 doesn't take flac very well, so we'll convert it. ffmpeg_cmd.extend([ # one input file is the audio '-i', audio, # for compatibility with various builds, we'll use MP3 '-c:a', 'libmp3lame', # high quality CBR is good enough '-b:a', '320k', ]) else: ffmpeg_cmd.extend([ # one input file is the audio '-i', audio, # only copy the audio, don't re-encode it '-c:a', 'copy', ]) # Add the video encoding options. ffmpeg_cmd.extend([ # use x264 as the video encoder '-c:v', 'libx264', # duration of the video '-t', str(delta.total_seconds()), # 4:4:4 chroma subsampling (best quality) '-pix_fmt', 'yuv444p', # as fast as possible, at cost of filesize # (uploading likely costs less time) '-preset', 'ultrafast', # lossless quality '-qp', '0', # output self.settings['path_output'] ]) try: probe_out = subprocess.check_output(ffmpeg_cmd, stderr=subprocess.STDOUT) if self.settings['verbose']: print(probe_out) except: error_exit('''encountered an error trying to generate the video. \ Try again with -v (--verbose) to see what went wrong. \ (Exception: %s)''' % sys.exc_info()[0]) print('Successfully generated the file `%s\'.' % self.settings['path_output'])
def get_trueFIs(exp_res_filename, eval_res_filename, min_freq, delta, pvalue_mode, do_filter=0): """ Compute the True Frequent Itemsets using the holdout method. The holdout method is described in Geoffrey I. Webb, "Discovering significant patterns" in Machine Learning, Vol. 68, Issue (1), pp. 1-3, 2007. The dataset is split in two parts, an exploratory part and an evaluation part. Each are mined separately at frequency 'min_freq'. The results are contained in 'exp_res_filename' and 'eval_res_filename' respectively. The parameter 'do_filter' controls a variant of the algorithm where the results from the exploratory part are filtered more. The p-values for the Binomial tests are computed using the mode specified by pvalue_mode: 'c' for Chernoff, 'e' for exact, or 'w' for weak Chernoff. The parameter 'use_additional_knowledge' can be used to incorporate additional knowledge about the data generation process. Returns a pair (trueFIs, stats). 'trueFIs' is a dict whose keys are itemsets (frozensets) and values are frequencies. This collection of itemsets contains only TFIs with probability at least 1 - delta. 'stats' is a dict containing various statistics used in computing the collection of itemsets.""" stats = dict() with open(exp_res_filename) as FILE: size_line = FILE.readline() try: size_str = size_line.split("(")[1].split(")")[0] except IndexError: utils.error_exit( " ".join( ("Cannot compute size of the explore dataset:", "'{}' is not in a recognized format\n".format( size_line)))) try: stats['exp_size'] = int(size_str) except ValueError: utils.error_exit( " ".join( ("Cannot compute size of the explore dataset:", "'{}' is not a number\n".format(size_str)))) with open(eval_res_filename) as FILE: size_line = FILE.readline() try: size_str = size_line.split("(")[1].split(")")[0] except IndexError: utils.error_exit( " ".join( ("Cannot compute size of the eval dataset:", "'{}' is not in a recognized format\n".format( size_line)))) try: stats['eval_size'] = int(size_str) except ValueError: utils.error_exit( " ".join( "Cannot compute size of the eval dataset:", "'{}' is not a number\n".format(size_str))) stats['orig_size'] = stats['exp_size'] + stats['eval_size'] exp_res = utils.create_results(exp_res_filename, min_freq) stats['exp_res'] = len(exp_res) trueFIs = dict() supposed_freq = (math.ceil( stats['orig_size'] * min_freq) - 1) / stats['orig_size'] stats['filter_critical_value'] = 0 if do_filter > 0: stats['lowered_delta'] = 1 - math.sqrt(1 - delta) exp_res_filtered = dict() stats['filter_critical_value'] = math.log(stats['lowered_delta']) - do_filter last_accepted_freq = 1.0 last_non_accepted_freq = 0.0 for itemset in exp_res: if utils.pvalue(pvalue_mode, exp_res[itemset], stats['exp_size'], supposed_freq) <= stats['filter_critical_value']: trueFIs[itemset] = exp_res[itemset] if exp_res[itemset] < last_accepted_freq: last_accepted_freq = exp_res[itemset] else: exp_res_filtered[itemset] = exp_res[itemset] if exp_res[itemset] > last_non_accepted_freq: last_non_accepted_freq = exp_res[itemset] # Compute epsilon for the binomial min_diff = 5e-6 # controls when to stop the binary search while last_accepted_freq - last_non_accepted_freq > min_diff: mid_point = (last_accepted_freq - last_non_accepted_freq) / 2 test_freq = last_non_accepted_freq + mid_point p_value = utils.pvalue(pvalue_mode, test_freq, stats['eval_size'], supposed_freq) if p_value <= stats['filter_critical_value']: last_accepted_freq = test_freq else: last_non_accepted_freq = test_freq stats['filter_epsilon'] = last_non_accepted_freq + ((last_accepted_freq - last_non_accepted_freq) / 2) - min_freq else: stats['lowered_delta'] = delta exp_res_filtered = exp_res stats['filter_epsilon'] = 1.0 exp_res_filtered_set = set(exp_res_filtered.keys()) stats['exp_res_filtered'] = len(exp_res_filtered_set) stats['tfis_from_exp'] = len(trueFIs) sys.stderr.write("do_filter: {}, tfis_from_exp: {}, exp_res_filtered: {}\n".format(do_filter, stats['tfis_from_exp'], stats['exp_res_filtered'])) if stats['exp_res_filtered'] > 0: eval_res = utils.create_results(eval_res_filename, min_freq) eval_res_set = set(eval_res.keys()) stats['eval_res'] = len(eval_res) intersection = exp_res_filtered_set & eval_res_set stats['holdout_intersection'] = len(intersection) stats['holdout_false_negatives'] = len(exp_res_filtered_set - eval_res_set) # Bonferroni correction (Union bound). We work in the log space. stats['critical_value'] = math.log(stats['lowered_delta']) - math.log(stats['exp_res_filtered']) # Add TFIs from eval last_accepted_freq = 1.0 last_non_accepted_freq = min_freq for itemset in sorted(intersection, key=lambda x : eval_res[x], reverse=True): p_value = utils.pvalue(pvalue_mode, eval_res[itemset], stats['eval_size'], supposed_freq) if p_value <= stats['critical_value']: trueFIs[itemset] = eval_res[itemset] last_accepted_freq = eval_res[itemset] else: last_non_accepted_freq = eval_res[itemset] break # Compute epsilon for the binomial min_diff = 5e-6 # controls when to stop the binary search while last_accepted_freq - last_non_accepted_freq > min_diff: mid_point = (last_accepted_freq - last_non_accepted_freq) / 2 test_freq = last_non_accepted_freq + mid_point p_value = utils.pvalue(pvalue_mode, test_freq, stats['eval_size'], supposed_freq) if p_value <= stats['critical_value']: last_accepted_freq = test_freq else: last_non_accepted_freq = test_freq stats['epsilon'] = last_non_accepted_freq + ((last_accepted_freq - last_non_accepted_freq) / 2) - min_freq stats['removed'] = len(intersection) - len(trueFIs) else: # stats['exp_res_filtered'] == 0 stats['eval_res'] = 0 stats['holdout_false_negatives'] = 0 stats['holdout_intersection'] = 0 stats['critical_value'] = 0 stats['epsilon'] = 0 stats['removed'] = 0 return (trueFIs, stats)
def main(): # Verify arguments if len(sys.argv) != 8: utils.error_exit(" ".join( ("Usage: {}".format(os.path.basename(sys.argv[0])), "vcdim first_epsilon delta min_freq gap exploreres", "evalres\n"))) exp_res_filename = sys.argv[6] if not os.path.isfile(exp_res_filename): utils.error_exit( "{} does not exist, or is not a file\n".format(exp_res_filename)) eval_res_filename = sys.argv[7] if not os.path.isfile(eval_res_filename): utils.error_exit( "{} does not exist, or is not a file\n".format(eval_res_filename)) try: vcdim = int(sys.argv[1]) except ValueError: utils.error_exit("{} is not a number\n".format(sys.argv[1])) try: first_epsilon = float(sys.argv[2]) except ValueError: utils.error_exit("{} is not a number\n".format(sys.argv[2])) try: delta = float(sys.argv[3]) except ValueError: utils.error_exit("{} is not a number\n".format(sys.argv[3])) try: min_freq = float(sys.argv[4]) except ValueError: utils.error_exit("{} is not a number\n".format(sys.argv[4])) try: gap = float(sys.argv[5]) except ValueError: utils.error_exit("{} is not a number\n".format(sys.argv[5])) (trueFIs, stats) = get_trueFIs(exp_res_filename, eval_res_filename, min_freq, delta, gap, first_epsilon, vcdim) utils.print_itemsets(trueFIs, stats['orig_size']) sys.stderr.write(",".join( ("exp_res_file={}".format(os.path.basename(exp_res_filename)), "eval_res_file={}".format(os.path.basename(eval_res_filename)), "d={}".format(delta), "min_freq={}".format(min_freq), "trueFIs={}\n".format(len(trueFIs))))) sys.stderr.write("orig_size={},exp_size={},eval_size={}\n".format( stats['orig_size'], stats['exp_size'], stats['eval_size'])) sys.stderr.write("exp_res={},exp_res_filtered={},eval_res={}\n".format( stats['exp_res'], stats['exp_res_filtered'], stats['eval_res'])) sys.stderr.write(",".join( ("holdout_intersection={}".format(stats['holdout_intersection']), "holdout_false_positives={}".format(stats['holdout_false_positives']), "holdout_false_negatives={}".format(stats['holdout_false_negatives']), "holdout_jaccard={}\n".format(stats['holdout_jaccard'])))) sys.stderr.write("e1={},e2={},vcdim={}\n".format(stats['epsilon_1'], stats['epsilon_2'], stats['vcdim'])) sys.stderr.write(",".join( ("exp_res_file,eval_res_file,delta,min_freq,trueFIs", "orig_size,exp_size,eval_size,exp_res,eval_res", "holdout_intersection,holdout_false_positives", "holdout_false_negatives,holdout_jaccard,e1,e2,vcdim\n"))) sys.stderr.write("{}\n".format(",".join( (str(i) for i in (os.path.basename(exp_res_filename), os.path.basename(eval_res_filename), delta, min_freq, len(trueFIs), stats['orig_size'], stats['exp_size'], stats['eval_size'], stats['exp_res'], stats['eval_res'], stats['holdout_intersection'], stats['holdout_false_positives'], stats['holdout_false_negatives'], stats['holdout_jaccard'], stats['epsilon_1'], stats['epsilon_2'], stats['vcdim'])))))
import argparse from utils import read_config, error_exit import local # Beam flow with local batch processing import gcloud # Beam flow with stream processing using GC features # -------------------------------------------------------------------- # Testing Beam... if __name__ == '__main__': parser = argparse.ArgumentParser( description='Running the selected demo beam pipeline') parser.add_argument('--cfgfile', default='config.json', help='Config file name with path') parser.add_argument('--pipeline', help='Pipeline to run: local | gcp') args = parser.parse_args() cfg = read_config(args.cfgfile) if args.pipeline == 'local': local.run_pipeline(cfg) elif args.pipeline == 'gcp': gcloud.run_pipeline(cfg) else: error_exit( 'Invalid option for argument --pipeline.\nValid options: local | gcp' ) print('\nNormal program termination.\n')
save_hr_chunks = False debug = True # Set display dimensions to training level dimensions if chunk_width is None or chunk_height is None: # no alt chunk size specified display_w = None display_h = None all_levels_info_file = "../platformer/level_saved_files_block/all_levels_info.json" levels = read_json(all_levels_info_file).get('contents') for level_info in levels: if level_info.get('level_name') == training_level: display_w = level_info.get('level_width') display_h = level_info.get('level_height') break if display_w is None or display_h is None: error_exit("%s level not found in all_levels_info.json" % training_level) chunk_width = int(display_w / TILE_DIM) chunk_height = int(display_h / TILE_DIM) else: display_w = chunk_width * TILE_DIM display_h = chunk_height * TILE_DIM tileset = "tilesets/platformer/%s.json" % training_level scale_w = 1 scale_h = 1 command_str = "python %s.py --tileset %s --display_width %d --display_height %d " \ "--scale_width %d --scale_height %d --chunk_width %d --chunk_height %d --failureMillis %d" % \ (program, tileset, display_w, display_h, scale_w, scale_h, chunk_width, chunk_height,
def main(trial, levels, num_sol, asp, state_graph): if not (asp or state_graph): utils.error_exit( "Must specify at least one validation test to run: --asp or --state_graph" ) # Get file formats config_formats = TRIAL_CONFIG_FORMATS.get(trial) if config_formats is None: utils.error_exit("--trial must be one of %s" % str(list(TRIAL_CONFIG_FORMATS.keys()))) prolog_file_format = "level_saved_files_block/prolog_files/%s.pl" model_str_file_format = "level_saved_files_block/generated_level_model_strs/%s.txt" assignments_dict_file_format = "level_saved_files_block/generated_level_assignments_dicts/%s.pickle" # Initialize validation counts asp_checked_count = 0 asp_valid_count = 0 state_graph_checked_count = 0 state_graph_valid_count = 0 # Validate each solver run for level in levels: for config_file_format in config_formats: for sol in range(num_sol): prolog_file = prolog_file_format % level prolog_filename = utils.get_basepath_filename( prolog_file, 'pl') config_file = config_file_format % level config_filename = utils.get_basepath_filename( config_file, 'json') answer_set_filename = '_'.join( [prolog_filename, config_filename, 'a%d' % sol]) if asp: # Determine ASP checks to perform based on config file contents config_file_contents = utils.read_json(config_file) config = config_file_contents['config'] require_all_platforms_reachable = True require_all_bonus_tiles_reachable = True if config.get( 'require_all_platforms_reachable') is not None: require_all_platforms_reachable = eval( config['require_all_platforms_reachable']) if config.get( 'require_all_bonus_tiles_reachable') is not None: require_all_bonus_tiles_reachable = eval( config['require_all_bonus_tiles_reachable']) prolog_file_info = get_prolog_file_info(prolog_file) tile_ids = get_tile_ids_dictionary(prolog_file_info) model_str_file = model_str_file_format % answer_set_filename if os.path.exists(model_str_file): model_str = utils.read_txt(model_str_file) asp_valid = Solver.asp_is_valid( check_path=True, check_onground=require_all_platforms_reachable, check_bonus=require_all_bonus_tiles_reachable, model_str=model_str, player_img='block', answer_set_filename=answer_set_filename, tile_ids=tile_ids, save=False) status = "ASP VALID" if asp_valid else "ASP INVALID" print("%s: %s" % (answer_set_filename, status)) asp_checked_count += 1 asp_valid_count += 1 if asp_valid else 0 if state_graph: assignments_dict_file = assignments_dict_file_format % answer_set_filename if os.path.exists(assignments_dict_file): assignments_dict = utils.read_pickle( assignments_dict_file) valid_path = Solver.get_state_graph_valid_path( assignments_dict=assignments_dict, player_img='block', prolog_filename=prolog_filename, answer_set_filename=answer_set_filename, save=True) status = "GRAPH VALID" if valid_path else "GRAPH INVALID" print("%s: %s" % (answer_set_filename, status)) state_graph_checked_count += 1 state_graph_valid_count += 1 if valid_path is not None else 0 # Print validation results summary if asp: print("ASPs Checked: %d" % asp_checked_count) print("ASPs Valid: %d" % asp_valid_count) if state_graph: print("State Graphs Checked: %d" % state_graph_checked_count) print("State Graphs Valid: %d" % state_graph_valid_count)
def main(game, level, player_img, use_graph, draw_all_labels, draw_dup_labels, draw_path, show_score): # Create the Level level_obj = Level.generate_level_from_file(game, level) # Level saved files state_graph_file = "level_saved_files_%s/enumerated_state_graphs/%s/%s.gpickle" % (player_img, game, level) if game == "generated" and os.path.exists("level_saved_files_%s/generated_level_paths/%s.pickle" % (player_img, level)): generated_level_path_coords = read_pickle("level_saved_files_%s/generated_level_paths/%s.pickle" % (player_img, level)) else: generated_level_path_coords = None if use_graph and os.path.exists(state_graph_file): print("***** USING ENUMERATED STATE GRAPH *****") state_graph = nx.read_gpickle(state_graph_file) else: print("***** USING MANUAL CONTROLS *****") state_graph = None edge_actions_dict = None if state_graph is None else nx.get_edge_attributes(state_graph, 'action') # Background FPS = 40 # frame rate ANI = 4 # animation cycles WORLD_X = min(level_obj.width, MAX_WIDTH) WORLD_Y = min(level_obj.height, MAX_HEIGHT) clock = pygame.time.Clock() pygame.init() world = pygame.display.set_mode([WORLD_X, WORLD_Y]) BACKGROUND_COLOR = COLORS.get('DARK_GRAY') # Player player_model = Player(player_img, level_obj) player_view = PlayerView(player_img) player_list = pygame.sprite.Group() player_list.add(player_view) # Level platform_sprites = get_sprites(level_obj.get_platform_coords(), 'block_tile.png') goal_sprites = get_sprites(level_obj.get_goal_coords(), 'goal_tile.png') bonus_sprites = get_sprites(level_obj.get_bonus_coords(), 'bonus_tile.png') one_way_platform_sprites = get_sprites(level_obj.get_one_way_platform_coords(), 'one_way_block_tile.png') hazard_sprites = get_sprites(level_obj.get_hazard_coords(), 'hazard_tile.png') wall_sprites = get_sprites(level_obj.get_wall_coords(), 'block_tile.png') collected_bonus_tile_coords_dict = {} # Camera camera = Camera(Camera.camera_function, level_obj.width, level_obj.height, WORLD_X, WORLD_Y) # Setup drawing metatile labels if draw_all_labels or draw_dup_labels: metatile_labels, font_color, label_padding = \ setup_metatile_labels(game, level, player_img, draw_all_labels, draw_dup_labels) # Setup drawing solution path if draw_path: path_font_color = COLORS.get('GREEN') start_font_color = COLORS.get('BLUE') goal_font_color = COLORS.get('RED') if generated_level_path_coords is not None: path_coords = generated_level_path_coords start_coord = generated_level_path_coords[0] goal_coord = generated_level_path_coords[-1] elif os.path.exists(state_graph_file): graph = nx.read_gpickle(state_graph_file) shortest_path_dict = shortest_path_xy(graph) path_coords = shortest_path_dict.get("path_coords") start_coord = shortest_path_dict.get("start_coord") goal_coord = shortest_path_dict.get("goal_coord") else: error_exit("No enumerated state graph available to draw solution path") # Input handling input_handler = Inputs() # Main Loop main = True while main: input_handler.onLoop() for event in pygame.event.get(): if event.type == pygame.QUIT: pygame.quit() main = False if event.type == pygame.KEYDOWN: if event.key == ord('q'): pygame.quit() main = False sys.exit() elif event.key == ord('r'): player_model.reset() collected_bonus_tile_coords_dict = {} platform_sprites = get_sprites(level_obj.get_platform_coords(), 'block_tile.png') input_handler.onEvent(event) if not main: break world.fill(BACKGROUND_COLOR) camera.update(player_view) # set camera to track player # Update Player model and view player_model.update(action=input_handler.getAction(), precomputed_graph=state_graph, edge_actions_dict=edge_actions_dict) player_view.update(player_model.state.x, player_model.state.y, player_model.half_player_w, player_model.half_player_h) # Update the current score hit_bonus_coord = player_model.get_hit_bonus_coord() if hit_bonus_coord is not '': hit_bonus_coord_x = player_model.state.x // TILE_DIM hit_bonus_coord_y = player_model.state.y // TILE_DIM - 1 if hit_bonus_coord == 'N': pass elif hit_bonus_coord == 'NE': hit_bonus_coord_x += 1 elif hit_bonus_coord == 'NW': hit_bonus_coord_x -= 1 else: error_exit("unrecognized hit bonus coord") hit_bonus_coord_xy = (hit_bonus_coord_x * TILE_DIM, hit_bonus_coord_y * TILE_DIM) if hit_bonus_coord_xy not in level_obj.get_bonus_coords(): error_exit("hit bonus tile that is not there: " + str(hit_bonus_coord_xy)) if collected_bonus_tile_coords_dict.get(hit_bonus_coord_xy) is None: collected_bonus_tile_coords_dict[hit_bonus_coord_xy] = 1 platform_sprites.add(Tile(hit_bonus_coord_xy[0], hit_bonus_coord_xy[1], 'block_tile.png')) score = len(collected_bonus_tile_coords_dict) * 10 # Draw sprites entities_to_draw = [] entities_to_draw += list(bonus_sprites) # draw bonus tiles entities_to_draw += list(platform_sprites) # draw platforms tiles entities_to_draw += list(one_way_platform_sprites) # draw one-way platform tiles entities_to_draw += list(hazard_sprites) entities_to_draw += list(wall_sprites) entities_to_draw += list(player_list) # draw player entities_to_draw += list(goal_sprites) # draw goal tiles for e in entities_to_draw: world.blit(e.image, camera.apply(e)) # Draw metatile labels if draw_all_labels or draw_dup_labels: for coord in level_obj.get_all_possible_coords(): # draw metatile border outlines tile_rect = pygame.Rect(coord[0], coord[1], TILE_DIM, TILE_DIM) tile_rect = camera.apply_to_rect(tile_rect) # adjust based on camera pygame.draw.rect(world, font_color, tile_rect, 1) for label in metatile_labels: # draw metatile labels surface, label_x, label_y = label label_x, label_y = camera.apply_to_coord((label_x, label_y)) world.blit(surface, (label_x + label_padding[0], label_y + label_padding[1])) # Draw level solution path if draw_path: for coord in path_coords: if coord == start_coord: color = start_font_color elif coord == goal_coord: color = goal_font_color else: color = path_font_color coord = eval(coord) path_component = pygame.Rect(coord[0], coord[1], 2, 2) path_component = camera.apply_to_rect(path_component) pygame.draw.rect(world, color, path_component, 1) # Draw text labels label_rect_pairs = [] if player_model.goal_reached(): score += 50 labels = [ ("You Win!", 50, COLORS.get('GREEN')), ("Score: %d" % score, 30, COLORS.get('YELLOW')), ("Press 'R' to replay or 'Q' to quit", 30, COLORS.get('YELLOW')) ] label_rect_pairs = get_label_rect_pairs(center_x=WORLD_X/2, labels=labels) elif player_model.is_dead(): labels = [ ("Game Over", 50, COLORS.get('RED')), ("Score: %d" % score, 30, COLORS.get('YELLOW')), ("Press 'R' to replay or 'Q' to quit", 30, COLORS.get('YELLOW')) ] label_rect_pairs = get_label_rect_pairs(center_x=WORLD_X / 2, labels=labels) elif show_score: labels = [("Score: %d" % score, 50, COLORS.get('YELLOW'))] label_rect_pairs = get_label_rect_pairs(center_x=WORLD_X / 2, labels=labels) for label, label_rect in label_rect_pairs: world.blit(label, label_rect) pygame.display.flip() clock.tick(FPS)
def main(): if len(sys.argv) != 7: utils.error_exit(" ".join( ("USAGE: {}".format(os.path.basename(sys.argv[0])), "use_additional_knowledge={{0|1}} delta min_freq gap dataset", "results_filename\n"))) dataset = sys.argv[5] res_filename = os.path.expanduser(sys.argv[6]) if not os.path.isfile(res_filename): utils.error_exit( "{} does not exist, or is not a file\n".format(res_filename)) try: use_additional_knowledge = int(sys.argv[1]) except ValueError: utils.error_exit("{} is not a number\n".format(sys.argv[1])) try: delta = float(sys.argv[2]) except ValueError: utils.error_exit("{} is not a number\n".format(sys.argv[2])) try: min_freq = float(sys.argv[3]) except ValueError: utils.error_exit("{} is not a number\n".format(sys.argv[3])) try: gap = float(sys.argv[4]) except ValueError: utils.error_exit("{} is not a number\n".format(sys.argv[4])) ds_stats = getDatasetInfo.get_ds_stats(dataset) (trueFIs, stats) = get_trueFIs(ds_stats, res_filename, min_freq, delta, gap, use_additional_knowledge) utils.print_itemsets(trueFIs, ds_stats['size']) sys.stderr.write(",".join( ("res_file={}".format(os.path.basename(res_filename)), "use_add_knowl={}".format(use_additional_knowledge), "e1={},e2={}".format(stats['epsilon_1'], stats['epsilon_2']), "d={}".format(delta), "min_freq={},trueFIs={}\n".format(min_freq, len(trueFIs))))) sys.stderr.write(",".join( ("base_set={}".format(stats['base_set']), "maximal_itemsets={}".format(stats['maximal_itemsets']), "negbor={}".format(stats['negative_border']), "emp_vc_dim={}".format(stats['emp_vc_dim']), "not_emp_vc_dim={}\n".format(stats['not_emp_vc_dim'])))) sys.stderr.write(",".join( ("res_file,add_knowl,e1,e2,delta,min_freq,trueFIs", "base_set,maximal_itemsets,negative_border,emp_vc_dim", "not_emp_vc_dim\n"))) sys.stderr.write("{}\n".format(",".join( (str(i) for i in (os.path.basename(res_filename), use_additional_knowledge, stats['epsilon_1'], stats['epsilon_2'], delta, min_freq, len(trueFIs), stats['base_set'], stats['maximal_itemsets'], stats['negative_border'], stats['emp_vc_dim'], stats['not_emp_vc_dim'])))))
def install_riemann(): langohr_source_url = ctx.node.properties['langohr_jar_source_url'] daemonize_source_url = ctx.node.properties['daemonize_rpm_source_url'] riemann_source_url = ctx.node.properties['riemann_rpm_source_url'] # Needed for Riemann's config cloudify_resources_url = ctx.node.properties['cloudify_resources_url'] rabbitmq_username = ctx.node.properties['rabbitmq_username'] rabbitmq_password = ctx.node.properties['rabbitmq_password'] riemann_config_path = '/etc/riemann' riemann_log_path = '/var/log/cloudify/riemann' langohr_home = '/opt/lib' extra_classpath = '{0}/langohr.jar'.format(langohr_home) # Confirm username and password have been supplied for broker before # continuing. # Components other than logstash and riemann have this handled in code. # Note that these are not directly used in this script, but are used by the # deployed resources, hence the check here. if not rabbitmq_username or not rabbitmq_password: utils.error_exit( 'Both rabbitmq_username and rabbitmq_password must be supplied ' 'and at least 1 character long in the manager blueprint inputs.') ctx.instance.runtime_properties['rabbitmq_endpoint_ip'] = \ utils.get_rabbitmq_endpoint_ip() ctx.logger.info('Installing Riemann...') utils.set_selinux_permissive() utils.copy_notice('riemann') utils.mkdir(riemann_log_path) utils.mkdir(langohr_home) utils.mkdir(riemann_config_path) utils.mkdir('{0}/conf.d'.format(riemann_config_path)) langohr = utils.download_cloudify_resource(langohr_source_url) utils.sudo(['cp', langohr, extra_classpath]) ctx.logger.info('Applying Langohr permissions...') utils.sudo(['chmod', '644', extra_classpath]) utils.yum_install(daemonize_source_url) utils.yum_install(riemann_source_url) utils.logrotate('riemann') ctx.logger.info('Downloading cloudify-manager Repository...') manager_repo = utils.download_cloudify_resource(cloudify_resources_url) ctx.logger.info('Extracting Manager Repository...') utils.untar(manager_repo, '/tmp') ctx.logger.info('Deploying Riemann manager.config...') utils.move( '/tmp/plugins/riemann-controller/riemann_controller/resources/manager.config', # NOQA '{0}/conf.d/manager.config'.format(riemann_config_path)) ctx.logger.info('Deploying Riemann conf...') utils.deploy_blueprint_resource('{0}/main.clj'.format(CONFIG_PATH), '{0}/main.clj'.format(riemann_config_path)) # our riemann configuration will (by default) try to read these environment # variables. If they don't exist, it will assume # that they're found at "localhost" # export MANAGEMENT_IP="" # export RABBITMQ_HOST="" # we inject the management_ip for both of these to Riemann's systemd # config. # These should be potentially different # if the manager and rabbitmq are running on different hosts. utils.systemd.configure('riemann') utils.clean_var_log_dir('riemann')
def build_processor(arguments): splitter = re.compile(DEFAULT_REGEXP_SPLITTER) group_by_list = arguments['--group-by'] arguments['--list-group-by'] = list() arguments['--list-group-by-type'] = list() for arg in group_by_list.split(','): group_by_element = splitter.match(arg).groupdict() if group_by_element is None: error_exit('incorrect item group-by of fields data "%s"' % arg) arguments['--list-group-by'].append(group_by_element['key']) arguments['--list-group-by-type'].append(group_by_element['value']) arguments['--group-by'] = ','.join(arguments['--list-group-by']) arguments['--group-by-type'] = ','.join(arguments['--list-group-by-type']) uni_count_element = splitter.match(arguments['--uni-count']).groupdict() if uni_count_element is None: error_exit('incorrect item uni-count of fields data "%s"' % arguments['--uni-count']) arguments['--uni-count'] = uni_count_element['key'] arguments['--uni-count-type'] = uni_count_element['value'] fields = arguments['<var>'] if arguments['print']: label = ', '.join(fields.keys()) + ':' selections = ', '.join(fields.keys()) query = 'select %s from log group by %s' % (selections, selections) report_queries = [(label, query)] elif arguments['top']: limit = int(arguments['--limit']) report_queries = [] for var in fields.keys(): label = 'top %s' % var query = 'select %s, count(1) as count from log group by %s order by count desc limit %d' % (var, var, limit) report_queries.append((label, query)) elif arguments['avg']: label = 'average %s' % fields.keys() selections = ', '.join('avg(%s)' % var for var in fields.keys()) query = 'select %s from log' % selections report_queries = [(label, query)] elif arguments['sum']: label = 'sum %s' % fields.keys() selections = ', '.join('sum(%s)' % var for var in fields.keys()) query = 'select %s from log' % selections report_queries = [(label, query)] elif arguments['query']: report_queries = arguments['<query>'] fields = arguments['<fields>'] else: report_queries = [(name, query % arguments) for name, query in DEFAULT_QUERIES] fields = dict(DEFAULT_FIELDS, **dict(zip(arguments['--list-group-by'], arguments['--list-group-by-type']))) fields[arguments['--uni-count']] = arguments['--uni-count-type'] for label, query in report_queries: logging.info('query for "%s":\n %s', label, query) auto_rotate = dict() limit_time = int(arguments['--auto-rotate']) auto_rotate['enabled'] = True if limit_time else False auto_rotate['interval'] = DEFAULT_LIMIT_TIME if limit_time < 0 or limit_time > 2592000 else limit_time auto_rotate['last_timestamp'] = '' if auto_rotate['enabled']: auto_rotate['get_last_ts_query'] = 'select max(time_local) from log' auto_rotate['delete_old_rows_query'] = 'delete from log where datetime(time_local) < ' \ 'datetime(:last_timestamp, "-" || :interval || " seconds")' logging.info('query for select last timestamp: %s', auto_rotate['get_last_ts_query']) logging.info('query for delete old rows: %s', auto_rotate['delete_old_rows_query']) if not arguments['--time-rpl-expr']: arguments['--time-rpl-expr'] = DEFAULT_REPLACE_EXPRESSION processor_fields = dict() if type(fields) is str: for field in fields: items = field.split(',') for item in items: fields_element = splitter.match(item).groupdict() if fields_element is None: error_exit('failed parsing of field data "%s"' % item) processor_fields[fields_element['key']] = fields_element['value'] elif type(fields) is dict: processor_fields = fields else: error_exit('incorrect type of fields data "%s"' % str(type(fields))) processor = SQLProcessor(report_queries, processor_fields, auto_rotate) return processor
def create_project(project_name, default, helloworld, api, spa): """ This function is responsible for interacting user with file creation. Args: project_name (string): This is the project name that will be used for project creation. api (bool): This flag says if project is going to have api-like boilerplate structure. spa (bool): This flag says if project is going to have spa-like boilerplate structure. Raises: FileExistsError: If project_name param has the same value as some of the directories in the current directory. """ # getting arguments and options from the locals() function options = locals() # project_name is removed since we want to browse through options and project_name isn't necessary options.pop('project_name') # if none of the options was selected, fall back to default if [i for i in options.values()].count(True) == 0: options['default'] = True # seeing if there are more than 2 options selected elif [i for i in options.values()].count(True) > 1: error_exit("Please make sure only 1 option is selected and try again.") # seeing if project_name matches any of directories in the current directory try: create_folder(project_name) except FileExistsError: error_exit( 'That directory already exists. Please check your project name and try again.' ) # printing when project creation is starting click.echo(NEWLINE + 'Creating a new Flask app in ' + colored(f'~/{project_name}', 'green') + '.') click.echo(NEWLINE) # create venv if helloworld option is not selected if not helloworld: create_venv(f'./{project_name}/venv/') # deciding which boilerplate to choose and creating it based on argument choice base_dir = os.path.dirname(__file__) # iterating over names and values in options dictionary for name, value in options.items(): if value: choice = os.path.join(base_dir, name) # copy the boilerplate filetree to the project folder try: copy_filetree(choice, f"./{project_name}/") except Exception as e: error_exit(e) # output hell starts here click.echo(f'Success! Created app {project_name} in {os.getcwd()}' + f'/{project_name}') click.echo('Inside that directory you can run several commands:') click.echo(NEWLINE) # print commands and descriptions print_command('python run.py', 'Starts the server, default config is set to development.') if not helloworld: print_command('export secret_key=STRING', 'Sets the secret key for your app.') print_command( 'export PRODUCTION=True', 'Sets production config for your app. Setting it to False will set the development config.' ) print_command( 'source venv/bin/activate (unix) \n\t./venv/Scripts/activate (windows)', 'Activate the virtual enviroment for the app.') print_command( 'pip install -r requirements.txt', 'Install the packages listed in requirements.txt into the venv.') click.echo('We suggest that you start by typing:') click.echo(colored('\tcd ', 'cyan') + colored(project_name, 'white')) click.echo( colored( '\tsource venv/bin/activate' if not system() == 'Windows' else '\t./venv/Scripts/activate', 'cyan')) click.echo( colored('\tpip install -r ', 'cyan') + colored('requirements.txt', 'white')) click.echo(colored('\tpython run.py', 'cyan')) else: click.echo('We suggest that you start by typing:') click.echo(colored('\tcd ', 'cyan') + colored(project_name, 'white')) click.echo(colored('\tpip install flask ', 'cyan')) click.echo(colored('\tpython app.py')) click.echo(NEWLINE + 'Happy hacking!')
def get_solver_config(config, prolog_file_info): # ----- LEVEL DIMENSIONS ----- level_w = config['level_dimensions']['width'] level_h = config['level_dimensions']['height'] # ----- FORCE TILE TYPE (tiles at specified coords must be a certain type) ----- forced_tiles = {} if config.get('force_tile_type') is not None: for tile_type, coord_strs in config['force_tile_type'].items(): check_tile_type_exists_in_prolog( tile_type, prolog_file_info, 'cannot force tile type (%s: %s)' % (tile_type, coord_strs)) forced_tiles[tile_type] = eval(coord_strs) # ----- SOFT CONSTRAINTS ----- soft_constraints = { "num_tile_ranges": False, "perc_tile_ranges": False, "perc_level_ranges": False } if config.get('soft_constraints') is not None: for constraint_key, constraint_value in config.get( 'soft_constraints').items(): soft_constraints[constraint_key] = eval(constraint_value) # ----- SPECIFY NUM TILE RANGES (for a certain type) ----- num_tile_ranges = {} lo, hi = 0, level_w * level_h if config.get('num_tile_ranges') is not None: for tile_type, range_str in config['num_tile_ranges'].items(): check_tile_type_exists_in_prolog( tile_type, prolog_file_info, 'cannot force num tile range %s' % range_str) min_tiles, max_tiles = eval(range_str) num_tile_ranges[tile_type] = setup_tile_freq_range( tile_type, min_tiles, max_tiles, lo, hi) # Check if total min tiles > total tiles min_total = 0 for tile_type, tile_range in num_tile_ranges.items(): min_total += tile_range[0] if min_total > level_w * level_h: error_exit( "Sum of min tiles (%d) in specified num_tile_ranges cannot exceed the total number of tiles " "available in the generated level (%d)" % (min_total, level_w * level_h)) # ----- SPECIFY PERCENT TILE RANGES (for a certain type) ----- perc_tile_ranges = {} lo, hi = 0, 100 for tile_type in METATILE_TYPES: perc_tile_ranges[tile_type] = (lo, hi) if config.get('perc_tile_ranges') is not None: for tile_type, range_str in config['perc_tile_ranges'].items(): check_tile_type_exists_in_prolog( tile_type, prolog_file_info, 'cannot force perc tile range %s' % range_str) min_perc_tiles, max_perc_tiles = eval(range_str) perc_tile_ranges[tile_type] = setup_tile_freq_range( tile_type, min_perc_tiles, max_perc_tiles, lo, hi) # Check if total min perc tiles > 100% min_perc_total = 0 for tile_type, tile_range in perc_tile_ranges.items(): min_perc_total += tile_range[0] if min_perc_total > 100: error_exit( "Sum of min perc tiles (%d) in specified perc_tile_ranges cannot exceed 100%%" % min_perc_total) # ----- SPECIFY PERCENT TILE RANGES (from a certain level) ----- level_ids_map = prolog_file_info.get('level_ids_map') perc_level_ranges = {} lo, hi = 0, 100 for level, ids in level_ids_map.items(): perc_level_ranges[level] = (lo, hi) if config.get('perc_level_ranges') is not None: for level, range_str in config['perc_level_ranges'].items(): if level_ids_map.get(level) is None: error_exit( "The tileset does not contain tiles from level (%s) (specified in perc_level_" "ranges). Valid levels are: %s" % (level, str(list(level_ids_map.keys())))) min_perc_level, max_perc_level = eval(range_str) perc_level_ranges[level] = setup_tile_freq_range( level, min_perc_level, max_perc_level, lo, hi) # Check if total min perc levels > 100% min_perc_level_total = 0 for level, tile_range in perc_level_ranges.items(): min_perc_level_total += tile_range[0] if min_perc_level_total > 100: error_exit( "Sum of min perc tiles (%d) from each level specified in perc_level_ranges cannot exceed 100%%" % min_perc_level_total) # ----- SPECIFY START/GOAL POSITION RANGES ----- tile_position_ranges = { 'start_column': (0, level_w - 1), 'start_row': (0, level_h - 1), 'goal_column': (0, level_w - 1), 'goal_row': (0, level_h - 1) } if config.get('tile_position_ranges') is not None: for position, range_str in config['tile_position_ranges'].items(): if tile_position_ranges.get(position) is None: error_exit( "%s tile position does not exist. Position must be one of %s" % (position, str(list(tile_position_ranges.keys())))) level_max = level_w if 'column' in position else level_h min_index, max_index = eval(range_str) min_index, max_index = setup_tile_position_range( min_index, max_index, level_max) tile_position_ranges[position] = (min_index, max_index) # ----- SPECIFY IF START AND/OR GOAL TILE MUST BE ON GROUND ----- require_start_on_ground = False require_goal_on_ground = False if config.get('require_start_on_ground') is not None: require_start_on_ground = eval(config['require_start_on_ground']) if config.get('require_goal_on_ground') is not None: require_goal_on_ground = eval(config['require_goal_on_ground']) # ----- SPECIFY RANGE NUMBER OF GAPS (PITS) ALLOWED ----- lo, hi = 0, level_w num_gaps_range = (lo, hi) if config.get('num_gaps_range') is not None: min_gaps, max_gaps = eval(config['num_gaps_range']) min_gaps, max_gaps = setup_tile_freq_range('gap', min_gaps, max_gaps, lo, hi) num_gaps_range = (min_gaps, max_gaps) # ----- SPECIFY IF ALL PLATFORM OR BONUS TILES MUST BE REACHABLE ----- require_all_platforms_reachable = False require_all_bonus_tiles_reachable = False if config.get('require_all_platforms_reachable') is not None: require_all_platforms_reachable = eval( config['require_all_platforms_reachable']) if config.get('require_all_bonus_tiles_reachable') is not None: require_all_bonus_tiles_reachable = eval( config['require_all_bonus_tiles_reachable']) return { 'level_w': level_w, # int 'level_h': level_h, # int 'forced_tiles': forced_tiles, # {type: list-of-tile-coords}\ 'soft_constraints': soft_constraints, # {constraint_type: constraint_value} 'num_tile_ranges': num_tile_ranges, # { type: (min, max) } 'perc_tile_ranges': perc_tile_ranges, # { type: (min, max) } 'perc_level_ranges': perc_level_ranges, # { level: (min, max) } 'tile_position_ranges': tile_position_ranges, # { position: (min, max) } 'require_start_on_ground': require_start_on_ground, # bool 'require_goal_on_ground': require_goal_on_ground, # bool 'num_gaps_range': num_gaps_range, # (min, max) 'require_all_platforms_reachable': require_all_platforms_reachable, # bool 'require_all_bonus_tiles_reachable': require_all_bonus_tiles_reachable # bool }
def deploy_manager_sources(): """Deploys all manager sources from a single archive. """ archive_path = ctx.node.properties['manager_resources_package'] archive_checksum_path = \ ctx.node.properties['manager_resources_package_checksum_file'] skip_checksum_validation = ctx.node.properties['skip_checksum_validation'] if archive_path: sources_agents_path = os.path.join(utils.CLOUDIFY_SOURCES_PATH, 'agents') agent_archives_path = utils.AGENT_ARCHIVES_PATH utils.mkdir(agent_archives_path) # this will leave this several hundreds of MBs archive on the # manager. should find a way to clean it after all operations # were completed and bootstrap succeeded as it is not longer # necessary utils.mkdir(RESOURCES_DIR) res_name = os.path.basename(archive_path) destination = os.path.join(RESOURCES_DIR, res_name) resources_archive_path = \ utils.download_cloudify_resource(archive_path, NODE_NAME, destination=destination) # This would ideally go under utils.download_cloudify_resource but as # of now, we'll only be validating the manager resources package. if not skip_checksum_validation: skip_if_failed = False if not archive_checksum_path: skip_if_failed = True archive_checksum_path = archive_path + '.md5' md5_name = os.path.basename(archive_checksum_path) destination = os.path.join(RESOURCES_DIR, md5_name) resources_archive_md5_path = \ utils.download_cloudify_resource(archive_checksum_path, NODE_NAME, destination=destination) if not utils.validate_md5_checksum(resources_archive_path, resources_archive_md5_path): if skip_if_failed: ctx.logger.warn('Checksum validation failed. ' 'Continuing as no checksum file was ' 'explicitly provided.') else: utils.error_exit( 'Failed to validate checksum for {0}'.format( resources_archive_path)) else: ctx.logger.info('Resources Package downloaded successfully...') else: ctx.logger.info( 'Skipping resources package checksum validation...') utils.untar(resources_archive_path, utils.CLOUDIFY_SOURCES_PATH, skip_old_files=True) def splitext(filename): # not using os.path.splitext as it would return .gz instead of # .tar.gz if filename.endswith('.tar.gz'): return '.tar.gz' elif filename.endswith('.exe'): return '.exe' else: utils.exit_error( 'Unknown agent format for {0}. ' 'Must be either tar.gz or exe'.format(filename)) def normalize_agent_name(filename): # this returns the normalized name of an agent upon which our agent # installer retrieves agent packages for installation. # e.g. Ubuntu-trusty-agent_3.4.0-m3-b392.tar.gz returns # ubuntu-trusty-agent return filename.split('_', 1)[0].lower() for agent_file in os.listdir(sources_agents_path): agent_id = normalize_agent_name(agent_file) agent_extension = splitext(agent_file) utils.move( os.path.join(sources_agents_path, agent_file), os.path.join(agent_archives_path, agent_id + agent_extension))
def get_trueFIs(exp_res_filename, eval_res_filename, min_freq, delta, pvalue_mode, first_epsilon=1.0): """ Compute the True Frequent Itemsets using the 'holdout-VC' method with the binomial test TODO Add more details.""" stats = dict() with open(exp_res_filename) as FILE: size_line = FILE.readline() try: size_str = size_line.split("(")[1].split(")")[0] except IndexError: utils.error_exit( "Cannot compute size of the explore dataset: '{}' is not in the recognized format\n" .format(size_line)) try: stats['exp_size'] = int(size_str) except ValueError: utils.error_exit( "Cannot compute size of the explore dataset: {} is not a number\n" .format(size_str)) with open(eval_res_filename) as FILE: size_line = FILE.readline() try: size_str = size_line.split("(")[1].split(")")[0] except IndexError: utils.error_exit( "Cannot compute size of the eval dataset: '{}' is not in the recognized format\n" .format(size_line)) try: stats['eval_size'] = int(size_str) except ValueError: utils.error_exit( "Cannot compute size of the eval dataset: '{}' is not a number\n" .format(size_str)) stats['orig_size'] = stats['exp_size'] + stats['eval_size'] exp_res = utils.create_results(exp_res_filename, min_freq) stats['exp_res'] = len(exp_res) exp_res_set = set(exp_res.keys()) eval_res = utils.create_results(eval_res_filename, min_freq) stats['eval_res'] = len(eval_res) eval_res_set = set(eval_res.keys()) intersection = exp_res_set & eval_res_set stats['holdout_intersection'] = len(intersection) stats['holdout_false_negatives'] = len(exp_res_set - eval_res_set) stats['holdout_false_positives'] = len(eval_res_set - exp_res_set) stats['holdout_jaccard'] = len(intersection) / len(exp_res_set | eval_res_set) # One may want to play with giving different values for the different error # probabilities, but there isn't really much point in it. stats['lowered_delta'] = 1.0 - math.sqrt(1 - delta) stats['filter_epsilon'] = first_epsilon sys.stderr.write("Computing candidates...") sys.stderr.flush() freq_bound = min_freq + stats['filter_epsilon'] exp_res_filtered = set() exp_res_filtered_items = set() trueFIs = dict() for itemset in exp_res: if exp_res[itemset] < freq_bound: exp_res_filtered.add(itemset) exp_res_filtered_items |= itemset else: # Add itemsets with frequency at last freq_bound to the TFIs trueFIs[itemset] = exp_res[itemset] sys.stderr.write("done: {} exp_res_filtered ({} items)\n".format( len(exp_res_filtered), len(exp_res_filtered_items))) sys.stderr.flush() stats['tfis_from_exp'] = len(trueFIs) stats['exp_res_filtered'] = len(exp_res_filtered) supposed_freq = (math.ceil(stats['orig_size'] * min_freq) - 1) / stats['orig_size'] if stats['exp_res_filtered'] > 0: eval_res = utils.create_results(eval_res_filename, min_freq) eval_res_set = set(eval_res.keys()) stats['eval_res'] = len(eval_res) intersection = exp_res_filtered & eval_res_set stats['holdout_intersection'] = len(intersection) stats['holdout_false_negatives'] = len(exp_res_filtered - eval_res_set) # Bonferroni correction (Union bound). We work in the log space. stats['critical_value'] = math.log(stats['lowered_delta']) - math.log( stats['exp_res_filtered']) # Add TFIs from eval last_accepted_freq = 1.0 last_non_accepted_freq = min_freq for itemset in sorted(intersection, key=lambda x: eval_res[x], reverse=True): p_value = utils.pvalue(pvalue_mode, eval_res[itemset], stats['eval_size'], supposed_freq) if p_value <= stats['critical_value']: trueFIs[itemset] = eval_res[itemset] last_accepted_freq = eval_res[itemset] else: last_non_accepted_freq = eval_res[itemset] break # Compute epsilon for the binomial min_diff = 5e-6 # controls when to stop the binary search while last_accepted_freq - last_non_accepted_freq > min_diff: mid_point = (last_accepted_freq - last_non_accepted_freq) / 2 test_freq = last_non_accepted_freq + mid_point p_value = utils.pvalue(pvalue_mode, test_freq, stats['eval_size'], supposed_freq) if p_value <= stats['critical_value']: last_accepted_freq = test_freq else: last_non_accepted_freq = test_freq stats['epsilon'] = last_non_accepted_freq + ( (last_accepted_freq - last_non_accepted_freq) / 2) - min_freq stats['removed'] = len(intersection) - len(trueFIs) else: # stats['exp_res_filtered'] == 0 stats['eval_res'] = 0 stats['holdout_false_negatives'] = 0 stats['holdout_intersection'] = 0 stats['critical_value'] = 0 stats['epsilon'] = 0 stats['removed'] = 0 return (trueFIs, stats)
def main(): if len(sys.argv) != 7: utils.error_exit( " ".join( ("USAGE: {}".format(os.path.basename(sys.argv[0])), "use_additional_knowledge={{0|1}} delta min_freq gap dataset", "results_filename\n"))) dataset = sys.argv[5] res_filename = os.path.expanduser(sys.argv[6]) if not os.path.isfile(res_filename): utils.error_exit( "{} does not exist, or is not a file\n".format(res_filename)) try: use_additional_knowledge = int(sys.argv[1]) except ValueError: utils.error_exit("{} is not a number\n".format(sys.argv[1])) try: delta = float(sys.argv[2]) except ValueError: utils.error_exit("{} is not a number\n".format(sys.argv[2])) try: min_freq = float(sys.argv[3]) except ValueError: utils.error_exit("{} is not a number\n".format(sys.argv[3])) try: gap = float(sys.argv[4]) except ValueError: utils.error_exit("{} is not a number\n".format(sys.argv[4])) ds_stats = getDatasetInfo.get_ds_stats(dataset) (trueFIs, stats) = get_trueFIs(ds_stats, res_filename, min_freq, delta, gap, use_additional_knowledge) utils.print_itemsets(trueFIs, ds_stats['size']) sys.stderr.write( ",".join( ("res_file={}".format(os.path.basename(res_filename)), "use_add_knowl={}".format(use_additional_knowledge), "e1={},e2={}".format(stats['epsilon_1'], stats['epsilon_2']), "d={}".format(delta), "min_freq={},trueFIs={}\n".format(min_freq, len(trueFIs))))) sys.stderr.write( ",".join( ("base_set={}".format(stats['base_set']), "maximal_itemsets={}".format(stats['maximal_itemsets']), "negbor={}".format(stats['negative_border']), "emp_vc_dim={}".format(stats['emp_vc_dim']), "not_emp_vc_dim={}\n".format(stats['not_emp_vc_dim'])))) sys.stderr.write( ",".join( ("res_file,add_knowl,e1,e2,delta,min_freq,trueFIs", "base_set,maximal_itemsets,negative_border,emp_vc_dim", "not_emp_vc_dim\n"))) sys.stderr.write("{}\n".format( ",".join((str(i) for i in ( os.path.basename(res_filename), use_additional_knowledge, stats['epsilon_1'], stats['epsilon_2'], delta, min_freq, len(trueFIs), stats['base_set'], stats['maximal_itemsets'], stats['negative_border'], stats['emp_vc_dim'], stats['not_emp_vc_dim'])))))
def get_trueFIs(exp_res_filename, eval_res_filename, min_freq, delta, pvalue_mode, first_epsilon=1.0): """ Compute the True Frequent Itemsets using the 'holdout-VC' method with the binomial test TODO Add more details.""" stats = dict() with open(exp_res_filename) as FILE: size_line = FILE.readline() try: size_str = size_line.split("(")[1].split(")")[0] except IndexError: utils.error_exit("Cannot compute size of the explore dataset: '{}' is not in the recognized format\n".format(size_line)) try: stats['exp_size'] = int(size_str) except ValueError: utils.error_exit("Cannot compute size of the explore dataset: {} is not a number\n".format(size_str)) with open(eval_res_filename) as FILE: size_line = FILE.readline() try: size_str = size_line.split("(")[1].split(")")[0] except IndexError: utils.error_exit("Cannot compute size of the eval dataset: '{}' is not in the recognized format\n".format(size_line)) try: stats['eval_size'] = int(size_str) except ValueError: utils.error_exit("Cannot compute size of the eval dataset: '{}' is not a number\n".format(size_str)) stats['orig_size'] = stats['exp_size'] + stats['eval_size'] exp_res = utils.create_results(exp_res_filename, min_freq) stats['exp_res'] = len(exp_res) exp_res_set = set(exp_res.keys()) eval_res = utils.create_results(eval_res_filename, min_freq) stats['eval_res'] = len(eval_res) eval_res_set = set(eval_res.keys()) intersection = exp_res_set & eval_res_set stats['holdout_intersection'] = len(intersection) stats['holdout_false_negatives'] = len(exp_res_set - eval_res_set) stats['holdout_false_positives'] = len(eval_res_set - exp_res_set) stats['holdout_jaccard'] = len(intersection) / len(exp_res_set | eval_res_set) # One may want to play with giving different values for the different error # probabilities, but there isn't really much point in it. stats['lowered_delta'] = 1.0 - math.sqrt(1 - delta) stats['filter_epsilon'] = first_epsilon sys.stderr.write("Computing candidates...") sys.stderr.flush() freq_bound = min_freq + stats['filter_epsilon'] exp_res_filtered = set() exp_res_filtered_items = set() trueFIs = dict() for itemset in exp_res: if exp_res[itemset] < freq_bound: exp_res_filtered.add(itemset) exp_res_filtered_items |= itemset else: # Add itemsets with frequency at last freq_bound to the TFIs trueFIs[itemset] = exp_res[itemset] sys.stderr.write("done: {} exp_res_filtered ({} items)\n".format(len(exp_res_filtered), len(exp_res_filtered_items))) sys.stderr.flush() stats['tfis_from_exp'] = len(trueFIs) stats['exp_res_filtered'] = len(exp_res_filtered) supposed_freq = (math.ceil( stats['orig_size'] * min_freq) - 1) / stats['orig_size'] if stats['exp_res_filtered'] > 0: eval_res = utils.create_results(eval_res_filename, min_freq) eval_res_set = set(eval_res.keys()) stats['eval_res'] = len(eval_res) intersection = exp_res_filtered & eval_res_set stats['holdout_intersection'] = len(intersection) stats['holdout_false_negatives'] = len(exp_res_filtered - eval_res_set) # Bonferroni correction (Union bound). We work in the log space. stats['critical_value'] = math.log(stats['lowered_delta']) - math.log(stats['exp_res_filtered']) # Add TFIs from eval last_accepted_freq = 1.0 last_non_accepted_freq = min_freq for itemset in sorted(intersection, key=lambda x : eval_res[x], reverse=True): p_value = utils.pvalue(pvalue_mode, eval_res[itemset], stats['eval_size'], supposed_freq) if p_value <= stats['critical_value']: trueFIs[itemset] = eval_res[itemset] last_accepted_freq = eval_res[itemset] else: last_non_accepted_freq = eval_res[itemset] break # Compute epsilon for the binomial min_diff = 5e-6 # controls when to stop the binary search while last_accepted_freq - last_non_accepted_freq > min_diff: mid_point = (last_accepted_freq - last_non_accepted_freq) / 2 test_freq = last_non_accepted_freq + mid_point p_value = utils.pvalue(pvalue_mode, test_freq, stats['eval_size'], supposed_freq) if p_value <= stats['critical_value']: last_accepted_freq = test_freq else: last_non_accepted_freq = test_freq stats['epsilon'] = last_non_accepted_freq + ((last_accepted_freq - last_non_accepted_freq) / 2) - min_freq stats['removed'] = len(intersection) - len(trueFIs) else: # stats['exp_res_filtered'] == 0 stats['eval_res'] = 0 stats['holdout_false_negatives'] = 0 stats['holdout_intersection'] = 0 stats['critical_value'] = 0 stats['epsilon'] = 0 stats['removed'] = 0 return (trueFIs, stats)
def main(game, levels, process, solve, trial, max_sol, threads): if process: print("----- PROCESSING -----") process_dir = utils.get_directory("process_console_output") for level in levels: process_file = utils.get_filepath(process_dir, "%s.txt" % level) os.system( "(time pypy3 main.py platformer %s %s --process) > %s 2>&1" % (game, level, process_file)) os.system( "(time python main.py platformer %s %s --gen_prolog) >> %s 2>&1" % (game, level, process_file)) print("Saved to: %s" % process_file) if solve: print("----- SOLVING -----") config_formats = TRIAL_CONFIG_FORMATS.get(trial) if config_formats is None: utils.error_exit("--trial must be one of %s" % str(list(TRIAL_CONFIG_FORMATS.keys()))) prolog_file_format = "level_saved_files_block/prolog_files/%s.pl" level_structural_txt_file_format = "level_structural_layers/generated/%s.txt" level_model_str_file_format = "level_saved_files_block/generated_level_model_strs/%s.txt" level_assignments_dict_file_format = "level_saved_files_block/generated_level_assignments_dicts/%s.pickle" level_valid_path_file_format = "level_saved_files_block/generated_level_paths/%s.pickle" level_state_graph_file_format = "level_saved_files_block/enumerated_state_graphs/generated/%s.gpickle" solve_dir = utils.get_directory("solver_console_output") sol_order = list(range(max_sol)) sol_order.reverse() for sol in sol_order: for config_file_format in config_formats: for level in levels: prolog_file = prolog_file_format % level prolog_filename = utils.get_basepath_filename( prolog_file, 'pl') config_file = config_file_format % (game, level) config_filename = utils.get_basepath_filename( config_file, 'json') answer_set_filename_format = '_'.join( [prolog_filename, config_filename, 'a%d']) cur_answer_set_filename = answer_set_filename_format % sol default_answer_set_filename = answer_set_filename_format % 0 solve_file = utils.get_filepath( "%s/%s/" % (solve_dir, level), "%s.txt" % cur_answer_set_filename) os.system( "(time python run_solver.py %s %s --max_sol 1 --threads %d --save --validate) > %s 2>&1" % (prolog_file, config_file, threads, solve_file)) print("Saved to: %s" % solve_file) if sol != 0 and os.path.exists( level_structural_txt_file_format % default_answer_set_filename): os.system("mv %s %s" % (level_structural_txt_file_format % default_answer_set_filename, level_structural_txt_file_format % cur_answer_set_filename)) if sol != 0 and os.path.exists( level_assignments_dict_file_format % default_answer_set_filename): os.system("mv %s %s" % (level_assignments_dict_file_format % default_answer_set_filename, level_assignments_dict_file_format % cur_answer_set_filename)) if sol != 0 and os.path.exists( level_model_str_file_format % default_answer_set_filename): os.system("mv %s %s" % (level_model_str_file_format % default_answer_set_filename, level_model_str_file_format % cur_answer_set_filename)) if sol != 0 and os.path.exists( level_valid_path_file_format % default_answer_set_filename): os.system("mv %s %s" % (level_valid_path_file_format % default_answer_set_filename, level_valid_path_file_format % cur_answer_set_filename)) if sol != 0 and os.path.exists( level_state_graph_file_format % default_answer_set_filename): os.system("mv %s %s" % (level_state_graph_file_format % default_answer_set_filename, level_state_graph_file_format % cur_answer_set_filename)) if os.path.exists(level_structural_txt_file_format % cur_answer_set_filename): print("Level txt path: %s" % level_structural_txt_file_format % cur_answer_set_filename)