Example #1
0
def patch(ngx_src, patch_path):
    '''patch nginx'''
    work_dir =os.path.abspath(os.curdir)
    os.chdir(ngx_src)
    abs_patch_path = os.path.abspath(patch_path)
    cmds = ['patch -p0 -t -s < ' + abs_patch_path,
            'patch -p1 -t -s < ' + abs_patch_path,
            'patch -p2 -t -s < ' + abs_patch_path,
            'patch -p3 -t -s < ' + abs_patch_path]

    for cmd in cmds:
        print cmd
        f = os.popen(cmd)
        output = f.read()
        if 'Skipping' in output:
            continue
        else:
            os.chdir(work_dir)
            return
        
	os.chdir(work_dir)
	input = raw_input("Patch failed, continue?(yes/no)")
	if input == "yes":
		print "continue.."
		return
	else:
		error_exit('Patch failed!')
def main():

    es_endpoint_ip = ctx.node.properties['es_endpoint_ip']
    es_endpoint_port = ctx.node.properties['es_endpoint_port']

    if not es_endpoint_ip:
        es_endpoint_ip = ctx.instance.host_ip
        _install_elasticsearch()

        utils.systemd.start('elasticsearch')
        utils.wait_for_port(es_endpoint_port, es_endpoint_ip)
        _configure_elasticsearch(host=es_endpoint_ip, port=es_endpoint_port)

        utils.systemd.stop('elasticsearch')
        utils.clean_var_log_dir('elasticsearch')
    else:
        ctx.logger.info('External Elasticsearch Endpoint provided: '
                        '{0}:{1}...'.format(es_endpoint_ip, es_endpoint_port))
        time.sleep(5)
        utils.wait_for_port(es_endpoint_port, es_endpoint_ip)
        ctx.logger.info('Checking if \'cloudify_storage\' '
                        'index already exists...')

        if http_request('http://{0}:{1}/cloudify_storage'.format(
                es_endpoint_ip, es_endpoint_port), method='HEAD'):
            utils.error_exit('\'cloudify_storage\' index already exists on '
                             '{0}, terminating bootstrap...'.format(
                                 es_endpoint_ip))
        _configure_elasticsearch(host=es_endpoint_ip, port=es_endpoint_port)

    ctx.instance.runtime_properties['es_endpoint_ip'] = es_endpoint_ip
Example #3
0
    def resumable_upload(self, insert_request):
        '''
        This method implements an exponential backoff strategy to resume a
        failed upload.
        '''
        response = None
        error = None
        retry = 0
        while response is None:
            try:
                status, response = insert_request.next_chunk()
                if 'id' in response:
                    print('''Video ID `%s' was successfully uploaded. \
Its visibility is set to `%s'.''' % (response['id'], self.settings['privacy']))
                    print('''URL of the newly uploaded video: \
<https://www.youtube.com/watch?v=%s>''' % response['id'])
                    print('''It may take some time for the video to \
finish processing; typically 1-10 minutes.''')
                else:
                    error_exit('''The upload failed with an unexpected \
response: %s''' % response)
            except HttpError, e:
                if e.resp.status in self.retriable_status_codes:
                    error = '''A retriable HTTP error %d occurred:\n%s''' % (
                        e.resp.status, e.content
                    )
                else:
                    raise
            except self.retriable_exceptions, e:
                error = 'A retriable error occurred: %s' % e
def _set_security(rabbitmq_ssl_enabled,
                  rabbitmq_cert_private,
                  rabbitmq_cert_public):
    # Deploy certificates if both have been provided.
    # Complain loudly if one has been provided and the other hasn't.
    if rabbitmq_ssl_enabled:
        if rabbitmq_cert_private and rabbitmq_cert_public:
            utils.deploy_ssl_certificate(
                'private', '/etc/rabbitmq/rabbit-priv.pem',
                'rabbitmq', rabbitmq_cert_private)
            utils.deploy_ssl_certificate(
                'public', '/etc/rabbitmq/rabbit-pub.pem',
                'rabbitmq', rabbitmq_cert_public)
            # Configure for SSL
            utils.deploy_blueprint_resource(
                '{0}/rabbitmq.config-ssl'.format(CONFIG_PATH),
                '/etc/rabbitmq/rabbitmq.config')
        else:
            utils.error_exit('When providing a certificate for rabbitmq, '
                             'both public and private certificates must be '
                             'supplied.')
    else:
        utils.deploy_blueprint_resource(
            '{0}/rabbitmq.config-nossl'.format(CONFIG_PATH),
            '/etc/rabbitmq/rabbitmq.config')
        if rabbitmq_cert_private or rabbitmq_cert_public:
            ctx.logger.warn('Broker SSL cert supplied but SSL not enabled '
                            '(broker_ssl_enabled is False).')
Example #5
0
def process(arguments):
    access_log = arguments['--access-log']
    log_format = arguments['--log-format']
    if access_log is None and not sys.stdin.isatty():
        # assume logs can be fetched directly from stdin when piped
        access_log = 'stdin'
    if access_log is None:
        access_log, log_format = detect_log_config(arguments)

    logging.info('access_log: %s', access_log)
    logging.info('log_format: %s', log_format)
    if access_log != 'stdin' and not os.path.exists(access_log):
        error_exit('access log file "%s" does not exist' % access_log)

    if arguments['info']:
        print('nginx configuration file:\n ', detect_config_path())
        print('access log file:\n ', access_log)
        print('access log format:\n ', log_format)
        print('available variables:\n ', ', '.join(sorted(extract_variables(log_format))))
        return

    source = build_source(access_log, arguments)
    pattern = build_pattern(log_format)
    processor = build_processor(arguments)
    setup_reporter(processor, arguments)
    process_log(source, pattern, processor, arguments)
Example #6
0
 def load(self, json_path):
     if json_path is None:
         json_path = self.json_path
     if not os.path.exits(json_path):
         error_exit("Json File Not Found!")
     f = open(json_path, 'r+')
     j = json.load(f)
     self.info = j.loads(j)
Example #7
0
    def publish(self,server):
        data = json.dumps(self.info)
        try:
            req = urllib2.Request(server)
            response = urllib2.urlopen(req, data)
        except Exception as e:
            print e
            error_exit("Publish failed")

        return response.read()
def install_logstash():

    logstash_unit_override = '/etc/systemd/system/logstash.service.d'

    logstash_source_url = ctx.node.properties['logstash_rpm_source_url']

    rabbitmq_username = ctx.node.properties['rabbitmq_username']
    rabbitmq_password = ctx.node.properties['rabbitmq_password']

    logstash_log_path = '/var/log/cloudify/logstash'
    logstash_conf_path = '/etc/logstash/conf.d'

    # injected as an input to the script
    ctx.instance.runtime_properties['es_endpoint_ip'] = \
        os.environ.get('ES_ENDPOINT_IP')
    ctx.instance.runtime_properties['rabbitmq_endpoint_ip'] = \
        utils.get_rabbitmq_endpoint_ip()

    # Confirm username and password have been supplied for broker before
    # continuing.
    # Components other than logstash and riemann have this handled in code.
    # Note that these are not directly used in this script, but are used by the
    # deployed resources, hence the check here.
    if not rabbitmq_username or not rabbitmq_password:
        utils.error_exit(
            'Both rabbitmq_username and rabbitmq_password must be supplied '
            'and at least 1 character long in the manager blueprint inputs.')

    ctx.logger.info('Installing Logstash...')
    utils.set_selinux_permissive()
    utils.copy_notice('logstash')

    utils.yum_install(logstash_source_url)

    utils.mkdir(logstash_log_path)
    utils.chown('logstash', 'logstash', logstash_log_path)

    ctx.logger.info('Creating systemd unit override...')
    utils.mkdir(logstash_unit_override)
    utils.deploy_blueprint_resource(
        '{0}/restart.conf'.format(CONFIG_PATH),
        '{0}/restart.conf'.format(logstash_unit_override))
    ctx.logger.info('Deploying Logstash conf...')
    utils.deploy_blueprint_resource(
        '{0}/logstash.conf'.format(CONFIG_PATH),
        '{0}/logstash.conf'.format(logstash_conf_path))

    ctx.logger.info('Deploying Logstash sysconfig...')
    utils.deploy_blueprint_resource(
        '{0}/logstash'.format(CONFIG_PATH),
        '/etc/sysconfig/logstash')

    utils.logrotate('logstash')
    utils.sudo(['/sbin/chkconfig', 'logstash', 'on'])
    utils.clean_var_log_dir('logstash')
Example #9
0
def _get_path_from_url(url):
    """
	a function to get path name for a repo
	[url] must be a git url
	"""
    try:
        repo = re.split("/", url)[-1]
        name = re.split("\.", repo)[0]
    except:
        error_exit("git url error, check the git url.")

    return name
Example #10
0
    def __parse_wiki(self,url):
        """
        parse the wiki site to get information
        """
        print "Begin to get information on wiki site, please wait ........."
        req = urllib2.Request(url)
        try:
                html = urllib2.urlopen(url, timeout=20).read()
        except:
                error_exit("url open error!\n")

        #To Do: encode
        content = BeautifulSoup(html)
        table = content.select("[class~=modules-index-table]")[0]
        module_table = []
        ptrs = table.find_all("tr")
        for ptr in ptrs[1:]:
            tds = ptr.find_all("td")
            detail = dict() 
            if tds[0].a.string:
                detail["name"] = tds[0].a.string.strip()
            else:
                detail["name"] = None

            if tds[1].string:
                detail["description"] = tds[1].string.strip()
            else:
                detail["description"] = None

            #author or authors
            atags = tds[2].find_all('a')
            author = ''
            if len(atags):
                for atag in atags:
                    author += atag.string + ' '
            else:
                author = tds[2].string
            detail["author"] = author.strip()

            if len(tds) == 4 and tds[3].find_all("a"):
				if tds[3].a["href"].startswith("/File"):
					detail["link"] = "http://wiki.nginx.org"+tds[3].a["href"]
				else:
					detail["link"] = tds[3].a["href"]
            else:
                detail["link"] = None

            module_table.append(detail)

        return module_table
Example #11
0
def detect_custom_log(arguments):
    """
    Get the custom log format specified in a custom config file
    :return: log format
    """
    custom_log = arguments['--custom-log-format']
    if not os.path.exists(custom_log):
        error_exit('Custom format config not found: %s' % custom_log)

    config = ConfigParser.ConfigParser()
    config.read(custom_log)
    log_format = config.get('log_format', 'log_format')
    log_format = log_format.replace('\n', '').replace('\'', '')
    return log_format
Example #12
0
def detect_log_config(arguments):
    """
    Detect access log config (path and format) of nginx. Offer user to select if multiple access logs are detected.
    :return: path and format of detected / selected access log
    """
    config = arguments['--config']
    if config is None:
        config = detect_config_path()
    if not os.path.exists(config):
        error_exit('Nginx config file not found: %s' % config)

    with open(config) as f:
        config_str = f.read()
    access_logs = dict(get_access_logs(config_str))
    if not access_logs:
        error_exit('Access log file is not provided and ngxtop cannot detect it from your config file (%s).' % config)

    log_formats = dict(get_log_formats(config_str))
    if len(access_logs) == 1:
        log_path, format_name = access_logs.items()[0]
        if format_name == 'combined':
            return log_path, LOG_FORMAT_COMBINED
        if format_name not in log_formats:
            error_exit('Incorrect format name set in config for access log file "%s"' % log_path)
        return log_path, log_formats[format_name]

    # multiple access logs configured, offer to select one
    print('Multiple access logs detected in configuration:')
    log_path = choose_one(access_logs.keys(), 'Select access log file to process: ')
    format_name = access_logs[log_path]
    if format_name not in log_formats:
        error_exit('Incorrect format name set in config for access log file "%s"' % log_path)
    return log_path, log_formats[format_name]
Example #13
0
def main():
    if len(sys.argv) > 3 or len(sys.argv) < 2:
        utils.error_exit("Usage: {} [keyword] dataset\n".format(os.path.basename(sys.argv[0])))

    if len(sys.argv) == 3 and sys.argv[1] == "name":
        print(os.path.basename(sys.argv[-1]))
    else:
        stats = get_ds_stats(sys.argv[-1])
        if len(sys.argv) == 2:
            print("'{}': {},".format(os.path.basename(sys.argv[-1]), stats))
        else:
            if sys.argv[1] in stats:
                print(stats[sys.argv[1]])
            else:
                utils.error_exit("Keyword '{}' not recognized\n".format(sys.argv[1]))
Example #14
0
def get_ds_stats(dataset, force_compute = False):
    """ Return a dict containing the statistics about the dataset.
    
    Look up 'dataset' in datasetsinfo.ds_stat. If present, return that dict,
    otherwise, compute the stats.

    See the comment at the beginning of compute_ds_stats() for info about the
    dict."""

    if dataset in datasetsinfo.ds_stats and force_compute == False:
        return datasetsinfo.ds_stats[dataset]
    else:
        if not os.path.isfile(dataset):
            utils.error_exit("{} not found in datasetsinfo.py and does not exist or is not a file\n".format(dataset))
        return compute_ds_stats(dataset)
Example #15
0
    def load(self):
        if not os.path.exists(self.yaml):
            error_exit("Cannot find yaml file!")

        try:
            info = yaml.load(open(yaml_path,'r'))
        except yaml.scanner.ScannerError:
            raise PulishError("Wrong format")

        for item in info.keys():
            try:
                self._check_item(info,item,self.rules[item])
            except PulishError as e:
                error_exit(e.message)

        return info
Example #16
0
def compile_with_dso(module_src_path, dso_path = '/usr/local/nginx/sbin/dso_tool',ngx_include_src='/usr/local/nginx/include'):

    print "Compiling with dso tool"
    cmd =dso_path+' --add-module='+module_src_path+' --nginx-include=' + ngx_include_src
    try:
        proc = subprocess.Popen(cmd.split(),stdout=open(os.devnull,'w'), stderr=PIPE)
    except Exception as e:
        error_exit(e.message)
        
    stdout, stderr = proc.communicate()
    output = stderr.decode('utf-8')
    if 'error' in output:
        err_msg = re.split('error:',output)[-1]
        error_exit(err_msg)

    print "Dso Config Success!"
Example #17
0
    def upload_tune(self, audio, image, args, video_ready=False):
        '''
        Uploads a video to Youtube.
        '''
        if not video_ready:
            self.generate_video(audio, image)

        if self.settings['generate_only']:
            print('Skipping Youtube upload.')
            exit()

        # Now upload the file to Youtube.
        print('Authenticating using the Youtube API...')
        try:
            youtube = self.get_authenticated_service(args)
        except httplib2.ServerNotFoundError, e:
            error_exit('%s.' % e)
Example #18
0
def main():
    global sample_size 
    global population_size
    global dataset
    # Verify arguments
    if len(sys.argv) != 3: 
        utils.error_exit("Usage: {} samplesize dataset\n".format(os.path.basename(sys.argv[0])))
    dataset = sys.argv[2]
    try:
        sample_size = int(sys.argv[1])
    except ValueError:
        utils.error_exit("{} is not a number\n".format(sys.argv[1]))

    ds_stats = getDatasetInfo.get_stats(dataset)
    population_size = ds_stats['size']

    random.seed()

    t = Timer("create_sample()", "from __main__ import create_sample")
    sys.stderr.write("Creating the sample took: {} ms \n".format(t.timeit(1) * 1000))
Example #19
0
    def get_access_log(self):
        """
        Get nginx access.log file path
        :return: access.log file path and log format
        """
        if self.access_log is not None:
            return self.access_log

        self.access_log = self.arguments['--access-log']
        log_format = self.arguments['--log-format']
        if self.access_log is None and not sys.stdin.isatty():
            # assume logs can be fetched directly from stdin when piped
            self.access_log = 'stdin'
        if self.access_log is None:
            self.access_log, log_format = detect_log_config(self.arguments)

        logging.info('access_log: %s', self.access_log)
        logging.info('log_format: %s', log_format)
        if self.access_log != 'stdin' and not os.path.exists(self.access_log):
            error_exit('access log file "%s" does not exist' % self.access_log)
        return self.access_log, log_format
Example #20
0
def detect_config_path():
    """
    Get nginx configuration file path based on `nginx -V` output
    :return: detected nginx configuration file path
    """
    try:
        proc = subprocess.Popen(['nginx', '-V'], stderr=subprocess.PIPE)
    except OSError:
        error_exit('Access log file or format was not set and nginx config file cannot be detected. ' +
                   'Perhaps nginx is not in your PATH?')

    stdout, stderr = proc.communicate()
    version_output = stderr.decode('utf-8')
    conf_path_match = re.search(r'--conf-path=(\S*)', version_output)
    if conf_path_match is not None:
        return conf_path_match.group(1)

    prefix_match = re.search(r'--prefix=(\S*)', version_output)
    if prefix_match is not None:
        return prefix_match.group(1) + '/conf/nginx.conf'
    return '/etc/nginx/nginx.conf'
Example #21
0
def main():
    """ Partition a dataset in two equal parts. """
    # Verify arguments
    if len(sys.argv) != 5: 
        utils.error_exit("Usage: {} dataset_size dataset_file expl_file eval_file\n".format(os.path.basename(sys.argv[0])))
    dataset = sys.argv[2]
    expl = sys.argv[3]
    eval = sys.argv[4]
    try:
       dataset_size = int(sys.argv[1])
    except ValueError:
        utils.error_exit("{} is not a number\n".format(sys.argv[1]))

    random.seed()
    expl_lines = frozenset(random.sample(range(dataset_size), dataset_size // 2))

    with open(dataset, "rt") as largeFILE, open(expl, "wt") as explFILE, open(eval, "wt") as evalFILE:
        index = 0
        for line in largeFILE:
            if index in expl_lines:
                explFILE.write(line)
            else:
                evalFILE.write(line)
            index += 1
Example #22
0
def main():
    if len(sys.argv) != 4:
        utils.error_exit("Usage: {} use_additional_knowledge={{0|1}} delta dataset\n".format(sys.argv[0]))
    try:
        use_additional_knowledge = int(sys.argv[1])
    except ValueError:
        utils.error_exit("{} is not an integer\n".format(sys.argv[1]))
    try:
        delta = float(sys.argv[2])
    except ValueError:
        utils.error_exit("{} is not an integer\n".format(sys.argv[2]))

    ds_stats = getDatasetInfo.get_ds_stats(sys.argv[3])

    (eps_vc_dim, eps_emp_vc_dim, returned) = epsilon_dataset(delta, ds_stats, use_additional_knowledge)

    print("{} {}".format(eps_vc_dim, eps_emp_vc_dim))
    print("{}\t{}".format(min(eps_vc_dim, eps_emp_vc_dim), returned))
Example #23
0
def compile_without_dso(module_src_path, ngx_src, prefix, other=''):
    '''
    call nginx configure file to auto compiler nginx with module src specified with path
	[module_src_path] is the source code of module
	[prefix] is the path of nginx/Tenginx, /usr/local/nginx by default
	[other] other configure options
    '''
    config_path = os.path.join(ngx_src,'configure')
    work_dir =os.path.abspath(os.curdir)
    os.chdir(ngx_src)

    
    if other is None:
		other = ''
    if not os.path.exists(config_path):
        error_exit('Path not find!: ' + config_path)

    print 'configure nginx with module path : %s' %module_src_path

    # cover nginx binary or not
    first_install = False
    config_options = ''

    #for test: ngx3m reset
    # ignore the [other] and just do ./configure, make , make install
    if module_src_path is None:
        first_install = True
    else:
        config_options = ' --add-module=' + module_src_path
        if not os.path.exists(module_src_path):
            error_exit('Path not find: ' + module_src_path)
    
    if not os.path.exists(prefix):
        first_install = True
    cmd = config_path + ' --prefix='+prefix +config_options+' '+other
    try:
        proc = subprocess.Popen(cmd.split(), stdout=PIPE, stderr=PIPE)
    except Exception as e:
        error_exit(e.message)
    stdout, stderr = proc.communicate()
    stderr_output = stderr.decode('utf-8')
    stdout_output = stdout.decode('utf-8')
    if 'error:' in stderr_output:
        err_msg = re.split('error:', stderr_output)[-1]
        error_exit(err_msg)
    elif 'error:' in stdout_output:
        err_msg = re.split('error:', stdout_output)[-1]
        error_exit(err_msg)
    elif "Permission denied" in stderr_output:
        error_exit("Permission denied")

    print "make ......."
    try:
        proc1 = subprocess.Popen(["make"], stdout=open(os.devnull,'w'), stderr=PIPE)
    except Exception as e:
        os.chdir(work_dir)
        error_exit(e.message)
    stdout, stderr = proc1.communicate()
    output = stderr.decode('utf-8')
    if "***" in output:
        print output
        os.chdir(work_dir)
        error_exit("make failed!")

    elif "Permission denied" in output:
        os.chdir(work_dir)
        error_exit("Permission denied")

    if first_install:
        print "make install......."
        try:
            proc2 = subprocess.Popen(["make","install"],stdout=open(os.devnull,'w'),stderr=PIPE)
        except Exception as e:
            os.chdir(work_dir)
            error_exit("Compile failed!")

        stdout, stderr = proc2.communicate()
        output = stderr.decode('utf-8')
        if "***" in output or "error" in output:
            print output
            os.chdir(work_dir)
            error_exit("make install failed!")

        elif "Permission denied" in output:
            os.chdir(work_dir)
            error_exit("Permission denied")
    else:
        ngx_path = os.path.join(prefix, "sbin/nginx")
        ngx_sbin_dir = os.path.join(prefix, "sbin")
        ngx_path_bak = os.path.join(prefix, "sbin/nginx.old")
        ngx_path_new = "./objs/nginx"
        os.rename(ngx_path, ngx_path_bak)
        shutil.copy(ngx_path_new, ngx_sbin_dir)
    os.chdir(work_dir)

    return config_options
    print "compile succeed!"
Example #24
0
def check_tile_type_exists_in_prolog(tile_type, prolog_file_info, error_msg):
    if len(prolog_file_info.get('%s_tile_ids' % tile_type)) < 1:
        error_exit("tile type (%s) not found in prolog file; %s" %
                   (tile_type, error_msg))
Example #25
0
def get_ngx_info(prefix):
	
    ngx_info = dict()
    if prefix is None:
		print "try default nginx path:/usr/local/nginx/sbin/nginx"
		path = "/usr/local/nginx/sbin/nginx"
		prefix = "/usr/local/nginx"
    else:
		ngx_info['prefix'] = prefix
		ngx_info['ngx_src'] = None
		path = os.path.join(prefix,"sbin/nginx")
    try:
        proc = subprocess.Popen([path,'-V'],stderr=subprocess.PIPE)
    except Exception as e:
        print e
        error_exit('Cannot find nginx! Please use option -p/--prefix to specify the prefix of tengine!')

    stdout, stderr = proc.communicate()
    output = stderr.decode('utf-8')
    try:
        ngx_versions = re.findall(r'Tengine/(\S*)\s*[\(]nginx/(\S*)\)', output)[0]
    except IndexError:
        error_exit("Unknown tengine version! Please use option -p/--path to specify the path of tengine")
    ngx_info['versions'] = dict()
    ngx_info['versions']['tengine_version'] = ngx_versions[0]
    ngx_info['versions']['nginx_version'] = ngx_versions[1]

    ngx_info['config_arguments'] = []
    conf_args = re.search(r'configure arguments: (.*)',output)
    try:
		config_args_detail = conf_args.group(1)
		if config_args_detail.startswith("--prefix="):
			config_arguments = re.findall(r'^--prefix=[\S]*([\S\s]*)', config_args_detail)[0].split()
		else:
			config_arguments = config_args_detail.split()
		ngx_info['config_arguments'] = config_arguments
    except Exception as e:
		print e
		ngx_info['config_arguments'] = None


    # get modules' info and store in self.modules   
    ngx_info['modules'] = dict()
    try:
		modules = re.search(r'loaded modules:([\s\S]*)', output).group(1)
		module_list = re.findall(r'(ngx_[\S]*) \(([\S]*)[,\)]',modules)

		for module in module_list:
			ngx_info['modules'][str(module[0])] = dict()
			module_info = ngx_info['modules'][str(module[0])]
			module_info['static'] = True if module[1] == 'static' else False
			module_info['wiki_name'] = None

			# only module installed with ngx3m can we know the version and install time
			module_info['version'] = None
			module_info['install_time'] = None
    except:
		ngx_info['modules'] = None
           
    
    return ngx_info
Example #26
0
def get_trueFIs(ds_stats,
                res_filename,
                min_freq,
                delta,
                gap=0.0,
                use_additional_knowledge=False):
    """ Compute the True Frequent Itemsets using the VC method we present in the
    paper.

    The parameter 'use_additional_knowledge' can be used to incorporate
    additional knowledge about the data generation process.

    'gap' controls how close to the optimal solution we ask the CPLEX solver to
    go. The right way to implement this would be to use a
    user-defined function in CPLEX.

    Returns a pair (trueFIs, stats).
    'trueFIs' is a dict whose keys are itemsets (frozensets) and values are
    frequencies. This collection of itemsets contains only TFIs with
    probability at least 1 - delta.
    'stats' is a dict containing various statistics used in computing the
    collection of itemsets."""

    stats = dict()

    # One may want to play with giving different values for the different error
    # probabilities, but there isn't really much point in it.
    lower_delta = 1.0 - math.sqrt(1 - delta)

    # Compute the maximum frequency of an itemset in the dataset
    with open(res_filename, 'rt') as FILE:
        size_line = FILE.readline()
        try:
            size_str = size_line.split("(")[1].split(")")[0]
        except IndexError:
            error_exit(
                "Cannot compute size of the dataset: '{}' is not in the recognized format\n"
                .format(size_line))
        try:
            size = int(size_str)
        except ValueError:
            error_exit(
                "Cannot compute size of the dataset: '{}' is not a number\n".
                format(size_line.split("(")[1].split(")")[0]))
        max_freq_line = FILE.readline()
        if max_freq_line.find("(") > -1:
            tokens = max_freq_line.split("(")
            itemset = frozenset(map(int, tokens[0].split()))
            try:
                support = int(tokens[1][:-2])
            except ValueError:
                error_exit(
                    "Cannot compute the maximum frequency: '{}' is not a number\n"
                    .format(tokens[1][:-2]))
            max_freq = support / size
        else:
            error_exit(
                "Cannot compute the maximum frequency: '{}' is not in the recognized format\n"
                .format(max_freq_line))

    # Compute the first epsilon using results from the paper (Riondato and
    # Upfal 2014)
    # Incorporate or not 'previous knowledge' about generative process in
    # computation of the VC-dimension, depending on the option passed on the
    # command line
    (eps_vc_dim, eps_shatter,
     returned) = epsilon.epsilon_dataset(lower_delta, ds_stats,
                                         use_additional_knowledge, max_freq)
    stats['epsilon_1'] = min(eps_vc_dim, eps_shatter)

    items = ds_stats['items']
    items_num = len(items)
    lengths_dict = ds_stats['lengths']
    lengths = sorted(lengths_dict.keys(), reverse=True)

    # Extract the first (and largest) set of itemsets with frequency at least
    # min_freq - stats['epsilon_1']
    lower_bound_freq = min_freq - stats['epsilon_1'] - (1 / ds_stats['size'])
    freq_itemsets_1_dict = utils.create_results(res_filename, lower_bound_freq)
    freq_itemsets_1_set = frozenset(freq_itemsets_1_dict.keys())
    freq_itemsets_1_sorted = sorted(freq_itemsets_1_set,
                                    key=lambda x: freq_itemsets_1_dict[x])
    freq_items_1 = set()
    for itemset in freq_itemsets_1_set:
        if len(itemset) == 1:
            freq_items_1 |= itemset
    freq_items_1_num = len(freq_items_1)

    sys.stderr.write("First set of FI's: {} itemsets\n".format(
        len(freq_itemsets_1_set)))
    sys.stderr.flush()

    constr_start_str = "cplex.SparsePair(ind = ["
    constr_end_str = "], val = vals)"

    # Compute the "base set" (terrible name), that is the set of
    # itemsets with frequency < min_freq + epsilon_1 (but greater than min_freq
    # - stats['epsilon_1']. In the paper we call it \mathcal{G}.
    sys.stderr.write("Creating base set...")
    sys.stderr.flush()
    base_set = dict()
    # We use the maximum frequency in the base set to compute the epislon
    max_freq_base_set = 0
    for itemset in freq_itemsets_1_sorted:
        if freq_itemsets_1_dict[itemset] < min_freq + stats['epsilon_1']:
            base_set[itemset] = freq_itemsets_1_dict[itemset]
            if freq_itemsets_1_dict[itemset] > max_freq_base_set:
                max_freq_base_set = freq_itemsets_1_dict[itemset]
        else:
            break
    stats['base_set'] = len(base_set)
    sys.stderr.write("done: {} itemsets\n".format(stats['base_set']))
    sys.stderr.flush()

    # Compute Closed Itemsets. We need them to compute the maximal.
    sys.stderr.write("Computing closed itemsets...")
    sys.stderr.flush()
    closed_itemsets = utils.get_closed_itemsets(base_set)
    closed_itemsets_len = len(closed_itemsets)
    sys.stderr.write(
        "done. Found {} closed itemsets\n".format(closed_itemsets_len))
    sys.stderr.flush()

    # Compute maximal itemsets. We will use them to compute the negative
    # border. An itemset is maximal frequent if none of its immediate supersets
    # is frequent.
    sys.stderr.write("Computing maximal itemsets...")
    sys.stderr.flush()
    maximal_itemsets_dict = utils.get_maximal_itemsets(closed_itemsets)
    maximal_itemsets = list(maximal_itemsets_dict.keys())
    stats['maximal_itemsets'] = len(maximal_itemsets)
    sys.stderr.write("done. Found {} maximal itemsets\n".format(
        stats['maximal_itemsets']))
    sys.stderr.flush()

    # Compute the negative border
    sys.stderr.write("Computing negative border...")
    sys.stderr.flush()
    negative_border = set()
    negative_border_items = set()
    # The idea is to look for "children" of maximal itemsets, and for
    # "siblings" of maximal itemsets
    for maximal in maximal_itemsets:
        for item_to_remove_from_maximal in maximal:
            reduced_maximal = maximal - frozenset([
                item_to_remove_from_maximal,
            ])
            for item in freq_items_1:
                if item in maximal:
                    continue
                # Create sibling
                candidate = reduced_maximal | frozenset([item])
                if candidate in freq_itemsets_1_set:
                    continue
                if candidate in negative_border:
                    continue
                to_add = True
                for item_to_remove in candidate:
                    subset = candidate - frozenset([item_to_remove])
                    if subset not in freq_itemsets_1_set:
                        to_add = False
                        break
                if to_add:
                    negative_border.add(candidate)
                    negative_border_items |= candidate
                if not to_add:
                    # if we added the sibling, there's no way we can add the
                    # child
                    candidate2 = maximal | frozenset([item])  # create child
                    if candidate2 in negative_border:
                        continue
                    to_add = True
                    for item_to_remove in candidate2:
                        subset = candidate2 - frozenset([item_to_remove])
                        if subset not in freq_itemsets_1_set:
                            to_add = False
                            break
                    if to_add:
                        negative_border.add(candidate2)
                        negative_border_items |= candidate
    # We don't need to add the non-frequent-items because none of them (or
    # their supersets) will ever be included in the output, so at most we lose
    # some statistical power, but it's not a problem of avoiding false
    # positives.
    # for item in non_freq_items_1:
    #    negative_border.add(frozenset([item]))
    #    negative_border_items.add(item)
    original_negative_border_len = len(negative_border)
    sys.stderr.write(
        "done. Length now: {}\n".format(original_negative_border_len))
    sys.stderr.flush()

    # Add the "base set" to negative_border, so that it becomes a superset of
    # the "true" negative border (with some caveats about non-frequent single
    # items and their supersets, see comment above)
    sys.stderr.write("Adding base set...")
    sys.stderr.flush()
    for itemset in base_set:
        negative_border.add(itemset)
        negative_border_items |= itemset
    sys.stderr.write("done. Length now: {}\n".format(len(negative_border)))
    sys.stderr.flush()
    negative_border = sorted(negative_border, key=len, reverse=True)
    stats['negative_border'] = len(negative_border)
    negative_border_items_sorted = sorted(negative_border_items)

    # Create the graph that we will use to compute the chain constraints.
    # The nodes are the itemsets in negative_border. There is an edge between
    # two nodes if one is contained in the other or vice-versa.
    # Cliques on this graph are chains.
    sys.stderr.write("Creating graph...")
    sys.stderr.flush()
    graph = nx.Graph()
    graph.add_nodes_from(negative_border)
    sys.stderr.write("added nodes...adding edges...")
    sys.stderr.flush()

    negative_border_items_in_sets_dict = dict()
    negative_border_itemset_index = 0
    itemset_indexes_dict = dict()
    for first_itemset_index in range(stats['negative_border']):
        first_itemset = negative_border[first_itemset_index]
        for second_itemset_index in range(first_itemset_index + 1,
                                          stats['negative_border']):
            second_itemset = negative_border[second_itemset_index]
            if first_itemset < second_itemset or \
                    second_itemset < first_itemset:
                graph.add_edge(first_itemset, second_itemset)
        for item in first_itemset:
            if item in negative_border_items_in_sets_dict:
                negative_border_items_in_sets_dict[item].append(
                    negative_border_itemset_index)
            else:
                negative_border_items_in_sets_dict[item] = \
                    [negative_border_itemset_index, ]
        itemset_indexes_dict[first_itemset] = negative_border_itemset_index
        negative_border_itemset_index += 1
    sys.stderr.write("done\n")
    sys.stderr.flush()

    capacity = freq_items_1_num - 1
    if use_additional_knowledge and 2 * ds_stats['maxlen'] < capacity:
        sys.stderr.write("Lowering capacity={} to {}\n".format(
            capacity, 2 * ds_stats['maxlen']))
        sys.stderr.flush()
        capacity = 2 * ds_stats['maxlen']

    vars_num = stats['negative_border'] + len(negative_border_items)
    constr_names = []

    (tmpfile_handle, tmpfile_name) = tempfile.mkstemp(prefix="cplx",
                                                      dir=os.environ['PWD'],
                                                      text=True)
    os.close(tmpfile_handle)
    with open(tmpfile_name, 'wt') as cplex_script:
        cplex_script.write("capacity = {}\n".format(capacity))
        cplex_script.write("import cplex, os, sys\n")
        cplex_script.write("from cplex.exceptions import CplexError\n")
        cplex_script.write("\n")
        cplex_script.write("\n")
        cplex_script.write(" ".join(
            ("os.environ[\"ILOG_LICENSE_FILE\"] =",
             "\"/local/projects/cplex/ilm/site.access.ilm\"\n")))
        cplex_script.write("vals = [-1.0, 1.0]\n")
        cplex_script.write("sets_num = {}\n".format(stats['negative_border']))
        cplex_script.write("items_num = {}\n".format(
            len(negative_border_items)))
        cplex_script.write("vars_num = {}\n".format(vars_num))
        cplex_script.write("my_ub = [1.0] * vars_num\n")
        cplex_script.write(
            "my_types = \"\".join(\"I\" for i in range(vars_num))\n")
        cplex_script.write(
            "my_obj = ([1.0] * sets_num) + ([0.0] * items_num)\n")
        cplex_script.write(" ".join(
            ("my_colnames =",
             "[\"set{0}\".format(i) for i in range(sets_num)] +",
             "[\"item{0}\".format(j) for j in range(items_num)]\n")))
        cplex_script.write("rows = [ ")

        sys.stderr.write("Writing knapsack constraints...")
        sys.stderr.flush()
        constr_num = 0
        for item_index in range(len(negative_border_items)):
            try:
                for itemset_index in negative_border_items_in_sets_dict[
                        negative_border_items_sorted[item_index]]:
                    constr_str = "".join(
                        (constr_start_str, "\"set{}\",\"item{}\"".format(
                            itemset_index, item_index), constr_end_str))
                    cplex_script.write("{},".format(constr_str))
                    constr_num += 1
                    name = "s{}i{}".format(item_index, itemset_index)
                    constr_names.append(name)
            except KeyError:
                sys.stderr.write(" ".join(
                    ("item_index={}".format(item_index),
                     "neg_border_items_sorted[item_index]={}\n".format(
                         negative_border_items_sorted[item_index]))))
                sys.stderr.write("{} in items: {}\n".format(
                    negative_border_items_sorted[item_index],
                    negative_border_items_sorted[item_index] in items))
                sys.stderr.write("{} in freq_items_1: {}\n".format(
                    negative_border_items_sorted[item_index],
                    negative_border_items_sorted[item_index] in freq_items_1))
                non_freq_items_1 = items - freq_items_1
                sys.stderr.write("{} in non_freq_items_1: {}\n".format(
                    negative_border_items_sorted[item_index],
                    negative_border_items_sorted[item_index]
                    in non_freq_items_1))
                in_pos_border = False
                pos_border_itemset = frozenset()
                for itemset in maximal_itemsets:
                    if negative_border_items_sorted[item_index] in itemset:
                        in_pos_border = True
                        pos_border_itemset = itemset
                        break
                sys.stderr.write(
                    "{} in maximal_itemsets: {}. Itemset: {}\n".format(
                        negative_border_items_sorted[item_index],
                        in_pos_border, pos_border_itemset))
                in_neg_border = False
                neg_border_itemset = frozenset()
                for itemset in negative_border:
                    if negative_border_items_sorted[item_index] in itemset:
                        in_neg_border = True
                        neg_border_itemset = itemset
                        break
                sys.stderr.write(
                    "{} in negative_border: {}. Itemset: {}\n".format(
                        negative_border_items_sorted[item_index],
                        in_neg_border, neg_border_itemset))
                sys.exit(1)

        # Create capacity constraints and write it to script
        constr_str = "".join(
            (constr_start_str,
             ",".join("\"item{}\"".format(j)
                      for j in range(len(negative_border_items))), "], val=[",
             ",".join("1.0" for j in range(len(negative_border_items))), "])"))
        cplex_script.write(constr_str)
        last_tell = cplex_script.tell()
        cplex_script.write(",")
        cap_constr_name = "capacity"
        constr_names.append(cap_constr_name)
        sys.stderr.write("done\n")
        sys.stderr.flush()

        # Create chain constraints and write them to script
        sys.stderr.write("Writing chain constraints...")
        sys.stderr.flush()
        chains_index = 0
        for clique in nx.find_cliques(graph):
            if len(clique) == 1:
                continue
            constr_str = "".join(
                (constr_start_str, ",".join(
                    "\"set{}\"".format(j)
                    for j in map(lambda x: itemset_indexes_dict[x], clique)),
                 "], val=[1.0] * {}".format(len(clique)), ")"))
            cplex_script.write(constr_str)
            last_tell = cplex_script.tell()
            cplex_script.write(",")
            name = "chain{}".format(chains_index)
            constr_names.append(name)
            chains_index += 1
        sys.stderr.write("done\n")
        sys.stderr.flush()

        sys.stderr.write(" ".join(
            ("Optimization problem: capacity={}".format(capacity),
             "vars_num={}".format(vars_num),
             "negative_border_size={}".format(stats['negative_border']),
             "negative_border_items_num={}".format(len(negative_border_items)),
             "constr_num={}".format(constr_num),
             "chains_index={}\n".format(chains_index))))
        sys.stderr.flush()

        # Go back one character to remove last comma ","
        cplex_script.seek(last_tell)
        cplex_script.write("]\n")
        cplex_script.write("my_rownames = {}\n".format(constr_names))
        cplex_script.write("constr_num = {}\n".format(constr_num))
        cplex_script.write("chain_constr_num = {}\n".format(chains_index))
        cplex_script.write(" ".join(
            ("my_senses = [\"G\"] * constr_num +",
             "[\"L\"] + [\"L\"] * chain_constr_num\n")))
        cplex_script.write(" ".join(
            ("my_rhs = [0.0] * constr_num + [capacity] +",
             "[1.0] * chain_constr_num\n")))
        cplex_script.write("\n")
        cplex_script.write("try:\n")
        cplex_script.write("    prob = cplex.Cplex()\n")
        cplex_script.write("    prob.set_error_stream(sys.stderr)\n")
        cplex_script.write("    prob.set_log_stream(sys.stderr)\n")
        cplex_script.write("    prob.set_results_stream(sys.stderr)\n")
        cplex_script.write("    prob.set_warning_stream(sys.stderr)\n")
        # cplex_script.write("    prob.parameters.mip.strategy.file.set(2)\n")
        cplex_script.write(
            "    prob.parameters.mip.tolerances.mipgap.set({})\n".format(gap))
        cplex_script.write(
            "    prob.parameters.timelimit.set({})\n".format(600))
        # cplex_script.write("
        # prob.parameters.mip.strategy.variableselect.set(3) # strong
        # branching\n")
        cplex_script.write(
            "    prob.objective.set_sense(prob.objective.sense.maximize)\n")
        cplex_script.write(" ".join(
            ("    prob.variables.add(obj = my_obj, ub = my_ub,",
             "types = my_types, names = my_colnames)\n")))
        cplex_script.write(" ".join(
            ("    prob.linear_constraints.add(lin_expr = rows,",
             "senses = my_senses, rhs = my_rhs, names = my_rownames)\n")))
        cplex_script.write(" ".join(
            ("    prob.MIP_starts.add(cplex.SparsePair(",
             "ind = [i for i in range(vars_num)],", "val = [1.0] * vars_num),",
             "prob.MIP_starts.effort_level.auto)\n")))
        cplex_script.write("    prob.solve()\n")
        cplex_script.write("".join(
            ("    print (prob.solution.get_status(),",
             "prob.solution.status[prob.solution.get_status()],",
             "prob.solution.MIP.get_best_objective(),",
             "prob.solution.MIP.get_mip_relative_gap())\n")))
        cplex_script.write("except CplexError, exc:\n")
        cplex_script.write("    print exc\n")

    # Run script, solve optimization problem, extract solution
    my_environ = os.environ
    if "ILOG_LICENSE_FILE" not in my_environ:
        my_environ["ILOG_LICENSE_FILE"] = \
            "/local/projects/cplex/ilm/site.access.ilm"
    try:
        cplex_output_binary_str = subprocess.check_output(
            ["python2.6", tmpfile_name], env=my_environ, cwd=os.environ["PWD"])
    except subprocess.CalledProcessError as err:
        os.remove(tmpfile_name)
        utils.error_exit("CPLEX exited with error code {}: {}\n".format(
            err.returncode, err.output))
    # finally:
    #    os.remove(tmpfile_name)

    cplex_output = cplex_output_binary_str.decode(
        locale.getpreferredencoding())
    cplex_output_lines = cplex_output.split("\n")
    cplex_solution_line = cplex_output_lines[-1 if len(cplex_output_lines[-1]
                                                       ) > 0 else -2]
    try:
        cplex_solution = eval(cplex_solution_line)
    except Exception:
        utils.error_exit(
            "Error evaluating the CPLEX solution line: {}\n".format(
                cplex_solution_line))

    sys.stderr.write("cplex_solution={}\n".format(cplex_solution))
    sys.stderr.flush()
    # if cplex_solution[0] not in (1, 101, 102):
    #    utils.error_exit("CPLEX didn't find the optimal solution: {} {}
    #    {}\n".format(cplex_solution[0], cplex_solution[1], cplex_solution[2]))

    # This is also an upper bound to the size of the true negative border
    optimal_sol_upp_bound = int(
        math.floor(cplex_solution[2] * (1 + cplex_solution[3])))

    # Compute non-empirical VC-dimension and first candidate to epsilon_2
    stats['not_emp_vc_dim'] = int(math.floor(
        math.log2(optimal_sol_upp_bound))) + 1
    if stats['not_emp_vc_dim'] > math.log2(len(negative_border)):
        sys.stderr.write(
            "Lowering non_empirical VC-dimension to maximum value\n")
        stats['not_emp_vc_dim'] = int(
            math.floor(math.log2(len(negative_border))))
    not_emp_epsilon_2 = epsilon.get_eps_vc_dim(lower_delta, ds_stats['size'],
                                               stats['not_emp_vc_dim'])
    sys.stderr.write(" ".join(
        ("items_num-1={}".format(items_num - 1),
         "optimal_sol_upp_bound={}".format(optimal_sol_upp_bound),
         "not_emp_vc_dim={}".format(stats['not_emp_vc_dim']),
         "not_emp_e2={}\n".format(not_emp_epsilon_2))))
    sys.stderr.flush()

    # Loop to compute empirical VC-dimension using lengths distribution
    items_num_str_len = len(str(len(negative_border_items) - 1))
    longer_equal = 0
    for i in range(len(lengths)):
        cand_len = lengths[i]
        if cand_len == items_num:
            continue
        longer_equal += lengths_dict[cand_len]
        # No need to include tests to check whether cand_len is lower than
        # 2*ds_stats['maxlen'] if use_additional_knowledge is True: it is
        # always true given that cand_len <= ds_stats['maxlen']
        if cand_len >= len(negative_border_items):
            cand_len = len(negative_border_items) - 1

        # Modify the script to use the new capacity.
        with open(tmpfile_name, 'r+t') as cplex_script:
            cplex_script.seek(0)
            cplex_script.write("capacity = {}\n".format(
                str(cand_len).ljust(items_num_str_len)))
        # Run the script, solve optimization problem, extract solution
        my_environ = os.environ
        if "ILOG_LICENSE_FILE" not in my_environ:
            my_environ["ILOG_LICENSE_FILE"] = \
                "/local/projects/cplex/ilm/site.access.ilm"
        try:
            cplex_output_binary_str = subprocess.check_output(
                ["python2.6", tmpfile_name],
                env=my_environ,
                cwd=os.environ["PWD"])
        except subprocess.CalledProcessError as err:
            os.remove(tmpfile_name)
            utils.error_exit("CPLEX exited with error code {}: {}\n".format(
                err.returncode, err.output))
        # finally:
        #    os.remove(tmpfile_name)

        cplex_output = cplex_output_binary_str.decode(
            locale.getpreferredencoding())
        cplex_output_lines = cplex_output.split("\n")
        cplex_solution_line = cplex_output_lines[
            -1 if len(cplex_output_lines[-1]) > 0 else -2]
        try:
            cplex_solution = eval(cplex_solution_line)
        except Exception:
            utils.error_exit(
                "Error evaluating the CPLEX solution line: {}\n".format(
                    cplex_solution_line))

        sys.stderr.write("{}\n".format(cplex_solution))
        # if cplex_solution[0] not in (1, 101, 102):
        #   utils.error_exit("CPLEX didn't find the optimal solution: {} {}
        #   {}\n".format(cplex_solution[0], cplex_solution[1],
        #   cplex_solution[2]))

        # if cplex_solution[0] == 102:
        optimal_sol_upp_bound_emp = int(
            math.floor(cplex_solution[2] * (1 + cplex_solution[3])))
        # else:
        #    optimal_sol_upp_bound_emp = cplex_solution[0]

        stats['emp_vc_dim'] = int(
            math.floor(math.log2(optimal_sol_upp_bound_emp))) + 1
        if stats['emp_vc_dim'] > math.log2(len(negative_border)):
            sys.stderr.write("Lowering VC-dimension to maximum value\n")
            stats['emp_vc_dim'] = int(
                math.floor(math.log2(len(negative_border))))

        sys.stderr.write(" ".join(
            ("cand_len={}".format(cand_len),
             "longer_equal={}".format(longer_equal),
             "emp_vc_dim={}".format(stats['emp_vc_dim']),
             "optimal_sol_upp_bound_emp={}\n".format(optimal_sol_upp_bound_emp)
             )))
        sys.stderr.flush()

        # If stopping condition is satisfied, exit.
        if stats['emp_vc_dim'] <= longer_equal:
            break
    # sys.stderr.write("{} {} {}\n".format(vc_dim_cand, vc_dim_cand2,
    # vc_dim_cand3))
    os.remove(tmpfile_name)

    # Compute the bound to the shatter coefficient, which we use to compute
    # epsilon
    bound = min((math.log(optimal_sol_upp_bound), stats['emp_vc_dim'] *
                 math.log(math.e * ds_stats['size'] / stats['emp_vc_dim'])))
    sys.stderr.write(
        "bound to shatter coeff: log_of_range_size={}, log_using_vc_dim={}\n".
        format(
            math.log(optimal_sol_upp_bound), stats['emp_vc_dim'] *
            math.log(math.e * ds_stats['size'] / stats['emp_vc_dim'])))
    sys.stderr.flush()

    # The following assert is to check that we are better than another bound to
    # the shatter coefficient which used the number of closed itemsets in the
    # base set and the size of the negative border of the base set.
    # Intuitively, the assert should not fail. =)
    assert (optimal_sol_upp_bound <=
            original_negative_border_len + closed_itemsets_len)

    # Compute second candidate to epsilon_2
    emp_epsilon_2 = epsilon.get_eps_shattercoeff_bound(lower_delta,
                                                       ds_stats['size'], bound,
                                                       max_freq_base_set)
    sys.stderr.write(
        "cand_len={} opt_sol_upp_bound_emp={} emp_vc_dim={} bound={} max_freq_base_set={} emp_e2={}\n"
        .format(cand_len, optimal_sol_upp_bound_emp, stats['emp_vc_dim'],
                bound, max_freq_base_set, emp_epsilon_2))
    sys.stderr.flush()

    sys.stderr.write("not_emp_e2={}, emp_e2={}\n".format(
        not_emp_epsilon_2, emp_epsilon_2))
    sys.stderr.flush()
    stats['epsilon_2'] = min(emp_epsilon_2, not_emp_epsilon_2)

    # Extract TFIs using epsilon_2
    sys.stderr.write("Extracting TFIs using epsilon_2...")
    sys.stderr.flush()
    trueFIs = dict()
    for itemset in reversed(freq_itemsets_1_sorted):
        if freq_itemsets_1_dict[itemset] >= min_freq + stats['epsilon_2']:
            trueFIs[itemset] = freq_itemsets_1_dict[itemset]
        else:
            break
    sys.stderr.write("done ({} TFIS)\n".format(len(trueFIs)))
    sys.stderr.flush()

    return (trueFIs, stats)
Example #27
0
    def generate_video(self, audio, image):
        '''
        Encodes a video file from our audio and image input files.
        '''
        # Check to see if our files exist at all.
        if not (os.path.exists(audio) and os.path.exists(image)):
            error_exit('please specify a valid audio and image file')

        in_image_ext = os.path.splitext(image)[1]
        in_audio_ext = os.path.splitext(audio)[1]

        # Check our MP3/OGG/FLAC/etc file and get its duration.
        probe_cmd = [self.settings['path_ffprobe'], audio]
        try:
            probe_out = subprocess.check_output(
                probe_cmd,
                stderr=subprocess.STDOUT
            )
            if self.settings['verbose']:
                print(probe_out)
        except:
            error_exit('''couldn't probe the audio file \
(ffprobe might not be available)''')

        # Try to extract some metadata from the file using Mutagen.
        try:
            metadata = mutagen.File(audio)
        except AttributeError:
            metadata = []

        # Save a human-readable version of the metadata in the object.
        # Keep the original Mutagen output around too.
        self.settings['metadata'] = {}
        self.settings['orig_metadata'] = metadata
        if metadata is not None:
            for tag in metadata:
                item = metadata[tag]
                # We join the item in case it's still a list, as in the case
                # of Vorbis.
                if isinstance(item, (list, tuple)):
                    item = ''.join(item)
                self.settings['metadata'][self.tunetags.tag_lookup(tag)] = \
                    str(item)

        # Lift the actual track duration string out of the output.
        duration = re.findall('Duration: (.+?),', probe_out)

        # If we get valid output, parse the duration and get a seconds value.
        # Otherwise, stop the script.
        if len(duration):
            duration = duration[0]
        else:
            error_exit('''couldn't parse ffprobe's output. Try again with \
-v (--verbose) to see what went wrong.''')

        # Turn the string into a datetime format.
        try:
            audio_info = datetime.strptime(duration, '%H:%M:%S.%f')
            delta = timedelta(
                hours=audio_info.hour,
                minutes=audio_info.minute,
                seconds=audio_info.second,
                microseconds=audio_info.microsecond
            )
        except ValueError:
            error_exit('''encountered an error trying to determine the \
duration of the audio file. It could be in an unrecognized format, or \
longer than 24 hours. (Duration: %s, exception: %s)''' % (
                duration, sys.exc_info()[0]
            ))

        print('Using image file `%s\', size: %s.' % (
            image,
            os.path.getsize(image)
        ))
        print('Using audio file `%s\', size: %s, duration: %s.' % (
            audio,
            os.path.getsize(audio),
            duration
        ))

        if self.settings['metadata'] == []:
            print("Couldn't extract audio file tags. Continuing.")
        else:
            print('Extracted %d tag(s) from the audio file.' % len(
                self.settings['metadata']
            ))

        print('Encoding video file...')

        # Now call ffmpeg and produce the video.
        ffmpeg_cmd = [
            self.settings['path_ffmpeg'],
            # loop the video (picture) for the movie's duration
            '-loop', '1',
            # a framerate of 1fps (anything lower won't be accepted by Youtube)
            '-framerate', '1:1',
            # one input file is the picture
            '-i', image,
            # automatically overwrite on duplicate
            '-y',
        ]
        # Add the audio file.
        if in_audio_ext == '.flac':
            # mp4 doesn't take flac very well, so we'll convert it.
            ffmpeg_cmd.extend([
                # one input file is the audio
                '-i', audio,
                # for compatibility with various builds, we'll use MP3
                '-c:a', 'libmp3lame',
                # high quality CBR is good enough
                '-b:a', '320k',
            ])
        else:
            ffmpeg_cmd.extend([
                # one input file is the audio
                '-i', audio,
                # only copy the audio, don't re-encode it
                '-c:a', 'copy',
            ])
        # Add the video encoding options.
        ffmpeg_cmd.extend([
            # use x264 as the video encoder
            '-c:v', 'libx264',
            # duration of the video
            '-t', str(delta.total_seconds()),
            # 4:4:4 chroma subsampling (best quality)
            '-pix_fmt', 'yuv444p',
            # as fast as possible, at cost of filesize
            # (uploading likely costs less time)
            '-preset', 'ultrafast',
            # lossless quality
            '-qp', '0',
            # output
            self.settings['path_output']
        ])

        try:
            probe_out = subprocess.check_output(
                ffmpeg_cmd,
                stderr=subprocess.STDOUT
            )
            if self.settings['verbose']:
                print(probe_out)
        except:
            error_exit('''encountered an error trying to generate the video. \
Try again with -v (--verbose) to see what went wrong. \
(Exception: %s)''' % sys.exc_info()[0])

        print('Successfully generated the file `%s\'.'
              % self.settings['path_output'])
Example #28
0
def get_trueFIs(exp_res_filename,
                eval_res_filename,
                min_freq,
                delta,
                gap=0.0,
                first_epsilon=1.0,
                vcdim=-1):
    """ Compute the True Frequent Itemsets using the 'holdout-VC' method.

    TODO Add more details."""

    stats = dict()

    with open(exp_res_filename) as FILE:
        size_line = FILE.readline()
        try:
            size_str = size_line.split("(")[1].split(")")[0]
        except IndexError:
            utils.error_exit(" ".join(
                ("Cannot compute size of the explore dataset:",
                 "'{}' is not in the recognized format\n".format(size_line))))
        try:
            stats['exp_size'] = int(size_str)
        except ValueError:
            utils.error_exit(" ".join(
                ("Cannot compute size of the explore dataset:",
                 "{} is not a number\n".format(size_str))))

    with open(eval_res_filename) as FILE:
        size_line = FILE.readline()
        try:
            size_str = size_line.split("(")[1].split(")")[0]
        except IndexError:
            utils.error_exit(" ".join(
                ("Cannot compute size of the eval dataset:",
                 "'{}' is not in the recognized format\n".format(size_line))))
        try:
            stats['eval_size'] = int(size_str)
        except ValueError:
            utils.error_exit(" ".join(
                ("Cannot compute size of the eval dataset:",
                 "'{}' is not a number\n".format(size_str))))

    stats['orig_size'] = stats['exp_size'] + stats['eval_size']

    exp_res = utils.create_results(exp_res_filename, min_freq)
    stats['exp_res'] = len(exp_res)
    exp_res_set = set(exp_res.keys())
    eval_res = utils.create_results(eval_res_filename, min_freq)
    stats['eval_res'] = len(eval_res)
    eval_res_set = set(eval_res.keys())
    intersection = exp_res_set & eval_res_set
    stats['holdout_intersection'] = len(intersection)
    stats['holdout_false_negatives'] = len(exp_res_set - eval_res_set)
    stats['holdout_false_positives'] = len(eval_res_set - exp_res_set)
    stats['holdout_jaccard'] = len(intersection) / \
        len(exp_res_set | eval_res_set)

    # One may want to play with giving different values for the different error
    # probabilities, but there isn't really much point in it.
    lower_delta = 1.0 - math.sqrt(1 - delta)

    stats['epsilon_1'] = first_epsilon

    sys.stderr.write("Computing candidates...")
    sys.stderr.flush()
    freq_bound = min_freq + stats['epsilon_1']
    candidates = []
    candidates_items = set()
    trueFIs = dict()
    for itemset in exp_res:
        if exp_res[itemset] < freq_bound:
            candidates.append(itemset)
            candidates_items |= itemset
        else:
            # Add itemsets with frequency at last freq_bound to the TFIs
            trueFIs[itemset] = exp_res[itemset]
    sys.stderr.write("done: {} candidates ({} items)\n".format(
        len(candidates), len(candidates_items)))
    sys.stderr.flush()

    if len(candidates
           ) > 0 and vcdim > -1 and len(candidates_items) - 1 > vcdim:
        sys.stderr.write("Using additional knowledge\n")
        candidates_items_sorted = sorted(candidates_items)
        candidates_items_in_sets_dict = dict()
        candidates_itemset_index = 0
        itemset_indexes_dict = dict()
        for first_itemset_index in range(len(candidates)):
            first_itemset = candidates[first_itemset_index]
            for item in first_itemset:
                if item in candidates_items_in_sets_dict:
                    candidates_items_in_sets_dict[item].append(
                        candidates_itemset_index)
                else:
                    candidates_items_in_sets_dict[item] = \
                        [candidates_itemset_index, ]
            itemset_indexes_dict[first_itemset] = candidates_itemset_index
            candidates_itemset_index += 1

        # Compute an upper-bound to the VC-dimension of the set of candidates.
        constr_start_str = "cplex.SparsePair(ind = ["
        constr_end_str = "], val = vals)"
        vars_num = len(candidates) + len(candidates_items)
        constr_names = []

        capacity = vcdim

        (tmpfile_handle,
         tmpfile_name) = tempfile.mkstemp(prefix="cplx",
                                          dir=os.environ['PWD'],
                                          text=True)
        os.close(tmpfile_handle)
        with open(tmpfile_name, 'wt') as cplex_script:
            cplex_script.write("capacity = {}\n".format(capacity))
            cplex_script.write("import cplex, os, sys\n")
            cplex_script.write("from cplex.exceptions import CplexError\n")
            cplex_script.write("\n")
            cplex_script.write("\n")
            cplex_script.write(" ".join(
                ("os.environ[\"ILOG_LICENSE_FILE\"] ="
                 "\"/local/projects/cplex/ilm/site.access.ilm\"\n")))
            cplex_script.write("vals = [-1.0, 1.0]\n")
            cplex_script.write("sets_num = {}\n".format(len(candidates)))
            cplex_script.write("items_num = {}\n".format(
                len(candidates_items)))
            cplex_script.write("vars_num = {}\n".format(vars_num))
            cplex_script.write("my_ub = [1.0] * vars_num\n")
            cplex_script.write(
                "my_types = \"\".join(\"I\" for i in range(vars_num))\n")
            cplex_script.write(
                "my_obj = ([1.0] * sets_num) + ([0.0] * items_num)\n")
            cplex_script.write(" ".join(
                ("my_colnames ="
                 "[\"set{0}\".format(i) for i in range(sets_num)]",
                 "+ [\"item{0}\".format(j) for j in range(items_num)]\n")))
            cplex_script.write("rows = [ ")

            sys.stderr.write("Writing knapsack constraints...")
            sys.stderr.flush()
            constr_num = 0
            for item_index in range(len(candidates_items)):
                try:
                    for itemset_index in \
                            candidates_items_in_sets_dict[
                                candidates_items_sorted[item_index]]:
                        constr_str = "".join(
                            (constr_start_str, "\"set{}\",\"item{}\"".format(
                                itemset_index, item_index), constr_end_str))
                        cplex_script.write("{},".format(constr_str))
                        constr_num += 1
                        name = "s{}i{}".format(item_index, itemset_index)
                        constr_names.append(name)
                except KeyError:
                    sys.stderr.write(" ".join(
                        ("item_index={}".format(item_index),
                         "candidates_items_sorted[item_index]={}\n".format(
                             candidates_items_sorted[item_index]))))
                    in_candidates = False
                    candidates_itemset = frozenset()
                    for itemset in candidates:
                        if candidates_items_sorted[item_index] in itemset:
                            in_candidates = True
                            candidates_itemset = itemset
                            break
                    sys.stderr.write(
                        "{} in negative_border: {}. Itemset: {}\n".format(
                            candidates_items_sorted[item_index], in_candidates,
                            candidates_itemset))
                    sys.exit(1)

            # Create capacity constraints and write it to script
            constr_str = "".join(
                (constr_start_str,
                 ",".join("\"item{}\"".format(j)
                          for j in range(len(candidates_items))), "], val=[",
                 ",".join("1.0" for j in range(len(candidates_items))), "])"))
            cplex_script.write(constr_str)
            cplex_script.write("]\n")
            cap_constr_name = "capacity"
            constr_names.append(cap_constr_name)
            sys.stderr.write("done\n")
            sys.stderr.flush()

            sys.stderr.write(" ".join(
                ("Optimization problem: capacity={}".format(capacity),
                 "vars_num={}".format(vars_num),
                 "candidates={}".format(len(candidates)),
                 "candidates_items_num={}".format(len(candidates_items)),
                 "constr_num={}\n".format(constr_num))))
            sys.stderr.flush()

            cplex_script.write("my_rownames = {}\n".format(constr_names))
            cplex_script.write("constr_num = {}\n".format(constr_num))
            cplex_script.write("my_senses = [\"G\"] * constr_num + [\"L\"]\n")
            cplex_script.write("my_rhs = [0.0] * constr_num + [capacity]\n")
            cplex_script.write("\n")
            cplex_script.write("try:\n")
            cplex_script.write("    prob = cplex.Cplex()\n")
            cplex_script.write("    prob.set_error_stream(sys.stderr)\n")
            cplex_script.write("    prob.set_log_stream(sys.stderr)\n")
            cplex_script.write("    prob.set_results_stream(sys.stderr)\n")
            cplex_script.write("    prob.set_warning_stream(sys.stderr)\n")
            # cplex_script.write("
            # prob.parameters.mip.strategy.file.set(2)\n")
            cplex_script.write(
                "    prob.parameters.mip.tolerances.mipgap.set({})\n".format(
                    gap))
            cplex_script.write(
                "    prob.parameters.timelimit.set({})\n".format(600))
            # cplex_script.write("
            # prob.parameters.mip.strategy.variableselect.set(3) # strong
            # branching\n")
            cplex_script.write("".join(("    prob.objective.set_sense(",
                                        "prob.objective.sense.maximize)\n")))
            cplex_script.write(" ".join(
                ("    prob.variables.add(obj = my_obj, ub = my_ub,",
                 "types = my_types, names = my_colnames)\n")))
            cplex_script.write(" ".join(
                ("    prob.linear_constraints.add(lin_expr = rows,",
                 "senses = my_senses, rhs = my_rhs,",
                 "names = my_rownames)\n")))
            cplex_script.write(" ".join(
                ("    prob.MIP_starts.add(cplex.SparsePair(ind =",
                 "[i for i in range(vars_num)], val = [1.0] * vars_num),",
                 "prob.MIP_starts.effort_level.auto)\n")))
            cplex_script.write("    prob.solve()\n")
            cplex_script.write(",".join(
                ("    print (prob.solution.get_status()",
                 "prob.solution.status[prob.solution.get_status()]",
                 "prob.solution.MIP.get_best_objective()"
                 "prob.solution.MIP.get_mip_relative_gap())\n")))
            cplex_script.write("except CplexError, exc:\n")
            cplex_script.write("    print exc\n")

        # Run script, solve optimization problem, extract solution
        my_environ = os.environ
        if "ILOG_LICENSE_FILE" not in my_environ:
            my_environ["ILOG_LICENSE_FILE"] = \
                "/local/projects/cplex/ilm/site.access.ilm"
        try:
            cplex_output_binary_str = subprocess.check_output(
                ["python2.6", tmpfile_name],
                env=my_environ,
                cwd=os.environ["PWD"])
        except subprocess.CalledProcessError as err:
            os.remove(tmpfile_name)
            utils.error_exit("CPLEX exited with error code {}: {}\n".format(
                err.returncode, err.output))
        # finally:
        #    os.remove(tmpfile_name)

        cplex_output = cplex_output_binary_str.decode(
            locale.getpreferredencoding())
        cplex_output_lines = cplex_output.split("\n")
        cplex_solution_line = cplex_output_lines[
            -1 if len(cplex_output_lines[-1]) > 0 else -2]
        try:
            cplex_solution = eval(cplex_solution_line)
        except Exception:
            utils.error_exit(
                "Error evaluating the CPLEX solution line: {}\n".format(
                    cplex_solution_line))

        sys.stderr.write("cplex_solution={}\n".format(cplex_solution))
        sys.stderr.flush()
        # if cplex_solution[0] not in (1, 101, 102):
        #    utils.error_exit("CPLEX didn't find the optimal solution: {} {}
        #    {}\n".format(cplex_solution[0], cplex_solution[1],
        #    cplex_solution[2]))

        optimal_sol_upp_bound = int(
            math.floor(cplex_solution[2] * (1 + cplex_solution[3])))
        stats['vcdim'] = int(math.floor(math.log2(optimal_sol_upp_bound))) + 1
        if stats['vcdim'] > math.log2(len(candidates)):
            sys.stderr.write("Lowering VC-dimension to maximum value\n")
            sys.stderr.flush()
            stats['vcdim'] = int(math.floor(math.log2(len(candidates))))
        stats['epsilon_2_vc'] = epsilon.get_eps_vc_dim(lower_delta,
                                                       stats['orig_size'],
                                                       stats['vcdim'])
    elif len(candidates
             ) > 0 and vcdim > -1 and len(candidates_items) - 1 <= vcdim:
        sys.stderr.write("Additional knowledge is useless\n")
        sys.stderr.flush()
        stats['vcdim'] = int(math.floor(math.log2(len(candidates))))
        stats['epsilon_2_vc'] = epsilon.get_eps_vc_dim(lower_delta,
                                                       stats['orig_size'],
                                                       stats['vcdim'])
    elif len(candidates) > 0 and vcdim == -1:
        sys.stderr.write("Not using additional knowledge\n")
        sys.stderr.flush()
        stats['vcdim'] = int(math.floor(math.log2(len(candidates))))
        stats['epsilon_2_vc'] = epsilon.get_eps_vc_dim(lower_delta,
                                                       stats['orig_size'],
                                                       stats['vcdim'])
    else:
        sys.stderr.write("There are no candidates\n")
        sys.stderr.flush()
        stats['vcdim'] = 0
        stats['epsilon_2_vc'] = 0

    # Loop to compute empirical VC-dimension using lengths distribution
    capacity_str_len = len(str(capacity))
    longer_equal = 0
    lengths_dict = ds_stats['lengths']
    lengths = sorted(lengths_dict.keys(), reverse=True)
    start_len_idx = 0
    while start_len_idx < len(lengths):
        if lengths[start_len_idx] > len(candidates_items) - 1:
            longer_equal += lengths_dict[start_len_idx]
            start_len_idx += 1
        else:
            break
    for i in range(start_len_idx, len(lengths)):
        cand_len = lengths[i]
        longer_equal += lengths_dict[cand_len]
        # Modify the script to use the new capacity.
        with open(tmpfile_name, 'r+t') as cplex_script:
            cplex_script.seek(0)
            cplex_script.write("capacity = {}\n".format(
                str(cand_len).ljust(capacity_str_len)))
        # Run the script, solve optimization problem, extract solution
        my_environ = os.environ
        if "ILOG_LICENSE_FILE" not in my_environ:
            my_environ["ILOG_LICENSE_FILE"] = \
                "/local/projects/cplex/ilm/site.access.ilm"
        try:
            cplex_output_binary_str = subprocess.check_output(
                ["python2.6", tmpfile_name],
                env=my_environ,
                cwd=os.environ["PWD"])
        except subprocess.CalledProcessError as err:
            os.remove(tmpfile_name)
            utils.error_exit("CPLEX exited with error code {}: {}\n".format(
                err.returncode, err.output))
        # finally:
        #    os.remove(tmpfile_name)

        cplex_output = cplex_output_binary_str.decode(
            locale.getpreferredencoding())
        cplex_output_lines = cplex_output.split("\n")
        cplex_solution_line = cplex_output_lines[
            -1 if len(cplex_output_lines[-1]) > 0 else -2]
        try:
            cplex_solution = eval(cplex_solution_line)
        except Exception:
            utils.error_exit(
                "Error evaluating the CPLEX solution line: {}\n".format(
                    cplex_solution_line))

        sys.stderr.write("{}\n".format(cplex_solution))
        # if cplex_solution[0] not in (1, 101, 102):
        #   utils.error_exit("CPLEX didn't find the optimal solution: {} {}
        #   {}\n".format(cplex_solution[0], cplex_solution[1],
        #   cplex_solution[2]))

        # if cplex_solution[0] == 102:
        optimal_sol_upp_bound_emp = int(
            math.floor(cplex_solution[2] * (1 + cplex_solution[3])))
        # else:
        #    optimal_sol_upp_bound_emp = cplex_solution[0]

        stats['emp_vc_dim'] = int(
            math.floor(math.log2(optimal_sol_upp_bound_emp))) + 1
        if stats['emp_vc_dim'] > math.log2(len(negative_border)):
            sys.stderr.write("Lowering VC-dimension to maximum value\n")
            stats['emp_vc_dim'] = int(
                math.floor(math.log2(len(negative_border))))

        sys.stderr.write(" ".join(
            ("cand_len={}".format(cand_len),
             "longer_equal={}".format(longer_equal),
             "emp_vc_dim={}".format(stats['emp_vc_dim']),
             "optimal_sol_upp_bound_emp={}\n".format(optimal_sol_upp_bound_emp)
             )))
        sys.stderr.flush()

        # If stopping condition is satisfied, exit.
        if stats['emp_vc_dim'] <= longer_equal:
            break
    os.remove(tmpfile_name)

    # Compute the bound to the shatter coefficient, which we use to compute
    # epsilon
    bound = min((math.log(len(candidates)), stats['emp_vc_dim'] *
                 math.log(math.e * stats['eval_size'] / stats['emp_vc_dim'])))

    # Compute second candidate to epsilon_2
    emp_epsilon_2 = epsilon.get_eps_shattercoeff_bound(lower_delta,
                                                       stats['eval_size'],
                                                       bound,
                                                       max_freq_base_set)
    sys.stderr.write(
        "cand_len={} opt_sol_upp_bound_emp={} emp_vc_dim={} bound={} max_freq_base_set={} emp_e2={}\n"
        .format(cand_len, optimal_sol_upp_bound_emp, stats['emp_vc_dim'],
                bound, max_freq_base_set, emp_epsilon_2))
    sys.stderr.flush()

    sys.stderr.write("not_emp_e2={}, emp_e2={}\n".format(
        stats['epsilon_2_vc'], emp_epsilon_2))
    sys.stderr.flush()
    stats['epsilon_2'] = min(emp_epsilon_2, stats['epsilon_2_vc'])

    if len(candidates) > 0:
        sys.stderr.write("Computing the candidates that are TFIs...")
        sys.stderr.flush()
        freq_bound = min_freq + stats['epsilon_2']
        eval_res_itemsets = frozenset(eval_res.keys())
        for itemset in sorted(frozenset(candidates) & eval_res_itemsets,
                              key=lambda x: eval_res[x],
                              reverse=True):
            if eval_res[itemset] >= freq_bound:
                trueFIs[itemset] = eval_res[itemset]
        sys.stderr.write("done\n")
        sys.stderr.flush()

    return (trueFIs, stats)
Example #29
0
class Tune2Tube(object):
    def __init__(self):
        self.settings = {
            # ffmpeg is a dependency for this script. ffprobe should be
            # installed along with ffmpeg.
            'path_ffmpeg': 'ffmpeg',
            'path_ffprobe': 'ffprobe',
            # Temporary output filename.
            'path_output': 'tmp.mp4',
            # Version number.
            't2t_version': '0.1',
            # Whether to display ffmpeg/ffprobe output.
            'verbose': False,
            # Whether to only generate the video file without uploading it.
            'generate_only': False,
            # Whether to forego the usage of stored oauth2 tokens.
            # If set to True, you will need to authenticate using your
            # browser each time you use the script.
            'no_stored_auth': False,
            # Default title to use in case the user's own title is
            # an empty string.
            'default_title': '(Empty title)',
            # Default variables to use for the dynamically generated title.
            'default_title_vars': 'artist,title',
            # Whether to use the dynamically generated title
            # from the file's metadata.
            'dynamic_title': True,
            'title': None,
            'title_vars': None
        }

        # Explicitly tell the underlying HTTP transport library not to retry,
        # since we are handling retry logic ourselves.
        httplib2.RETRIES = 1

        # Maximum number of times to retry before giving up.
        self.max_retries = 10

        # Always retry when these exceptions are raised.
        self.retriable_exceptions = (httplib2.HttpLib2Error, IOError,
                                     httplib.NotConnected,
                                     httplib.IncompleteRead,
                                     httplib.ImproperConnectionState,
                                     httplib.CannotSendRequest,
                                     httplib.CannotSendHeader,
                                     httplib.ResponseNotReady,
                                     httplib.BadStatusLine)

        # Always retry when an apiclient.errors.HttpError with one of these
        # status codes is raised.
        self.retriable_status_codes = [500, 502, 503, 504]

        # This OAuth 2.0 access scope allows an application to upload files to
        # the authenticated user's YouTube channel, but doesn't allow other
        # types of access.
        self.youtube_base = 'https://www.googleapis.com'
        self.youtube_upload_scope = self.youtube_base + '/auth/youtube.upload'
        self.youtube_api_service_name = 'youtube'
        self.youtube_api_version = 'v3'

        # We can set our uploaded video to one of these statuses.
        self.valid_privacy_statuses = ('public', 'private', 'unlisted')

        # This variable defines a message to display if
        # the client_secrets_file is missing.
        self.missing_client_secrets_message = '''
%s: Error: Please configure OAuth 2.0.

To make this script run you will need to populate the client_secrets.json file
found at:

   %s

with information from the Developers Console, which can be accessed
through <https://console.developers.google.com/>. See the README.md file
for more details.
'''

        # Set up our command line argument parser.
        # The argparser is initialized in oauth2client/tools.py. We're just
        # adding our own arguments to the ones already defined there.
        argparser.description = '''Generates a video from an image and audio \
file and uploads it to Youtube.'''
        argparser.epilog = '''A Youtube Data API client key is required to \
use this script, as well as ffmpeg. For help on setting up these \
dependencies, see this project\'s Github page \
<http://github.com/msikma/tune2tube/> or the included README.md file.'''
        argparser.add_help = True
        # Manually add a help argument,
        # as it is turned off in oauth2client/tools.py.
        argparser.add_argument('--no_stored_auth',
                               action='store_true',
                               help='Forego using stored oauth2 tokens.')
        argparser.add_argument('audio_file',
                               help='Audio file (MP3, OGG, FLAC, etc).')
        argparser.add_argument('image_file',
                               help='Image file (PNG, JPG, etc).')
        argparser.add_argument(
            '--output',
            help='''Save the output video (.MP4) to a file rather than \
uploading it to Youtube.''')
        argparser.add_argument('--cs_json',
                               help='''Path to the client secrets json file \
(default: client_secrets.json).''',
                               default='client_secrets.json')
        argparser.add_argument(
            '--privacy',
            choices=self.valid_privacy_statuses,
            help='Privacy status of the video (default: unlisted).',
            default='unlisted')
        argparser.add_argument(
            '--category',
            default='10',
            help='''Numeric video category (see the Github wiki for a list; \
the default is 10, Music).''')
        argparser.add_argument(
            '--keywords',
            help='Comma-separated list of video keywords/tags.',
            default='')
        mxgroup = argparser.add_mutually_exclusive_group()
        mxgroup.add_argument(
            '--title',
            help='''Video title string (default: \'%s\'). If neither --title \
nor --title_vars is specified, --title_vars will be used with its default \
value, unless this would result in \
an empty title.''' % self.settings['default_title'])
        mxgroup.add_argument(
            '--title_vars',
            nargs='?',
            help='''Comma-separated list of metadata variables to use as \
the video title (default: %s).''' % self.settings['default_title_vars'])
        argparser.add_argument(
            '--title_sep',
            help='''Separator for the title variables (default: \' - \', \
yielding e.g. \'Artist - Title\'). Ignored if \
using --title_str.''',
            default=' - ')
        argparser.add_argument(
            '--description',
            nargs='?',
            help='Video description string (default: empty string).',
            default='')
        argparser.add_argument(
            '--add_metadata',
            action='store_true',
            help='''Adds a list of audio file metadata to the \
description (default: True).''',
            default=True)
        argparser.add_argument('-V',
                               '--version',
                               action='version',
                               version='%(prog)s ' +
                               self.settings['t2t_version'],
                               help='Show version number and exit.')
        mxgroup = argparser.add_mutually_exclusive_group()
        mxgroup.add_argument(
            '-v',
            '--verbose',
            action='store_true',
            help='Verbose mode (display ffmpeg/ffprobe output).')
        mxgroup.add_argument('-q',
                             '--quiet',
                             action='store_true',
                             help='Quiet mode.')
        argparser.add_argument('-h',
                               '--help',
                               action='help',
                               default=argparse.SUPPRESS,
                               help='Show this help message and exit.')

        self.tunetags = TuneTags()

    def get_authenticated_service(self, args):
        '''
        Get authenticated and cache the result.
        '''
        flow = flow_from_clientsecrets(
            self.settings['client_secrets_file'],
            scope=self.youtube_upload_scope,
            message=self.missing_client_secrets_message %
            ('tune2tube.py',
             os.path.abspath(
                 os.path.join(os.path.dirname(__file__),
                              self.settings['client_secrets_file']))))

        storage = Storage('%s-oauth2.json' % 'tune2tube.py')
        credentials = storage.get()
        if credentials is None or credentials.invalid \
           or self.settings['no_stored_auth']:
            credentials = run_flow(flow, storage, args)

        return build(self.youtube_api_service_name,
                     self.youtube_api_version,
                     http=credentials.authorize(httplib2.Http()))

    def initialize_upload(self, youtube, args, upfile):
        '''
        Begin a resumable video upload.
        '''
        tags = None

        if self.settings['keywords']:
            tags = self.settings['keywords'].split(',')

        # If we need to generate a dynamic title, do so now.
        if self.settings['dynamic_title']:
            title_vars = self.settings['title_vars'].split(',')
            items = [
                self.settings['metadata'][n] for n in title_vars
                if n in self.settings['metadata']
            ]
            title = self.settings['title_sep'].join(items)
        else:
            title = self.settings['title']

        if title == '':
            title = '(no title)'

        # Add the metadata tags to the description if needed.
        description = self.settings['description'].strip()
        if self.settings['add_metadata']:
            if description is not '':
                description += '\n'
            # Sort the list of metadata, so that items with linebreaks go last.
            metalist = [{
                key: self.settings['metadata'][key]
            } for key in self.settings['metadata']]
            metalist = sorted(metalist,
                              key=lambda x: '\n' in list(x.values())[0])
            for tag in metalist:
                for key in tag:
                    if "APIC" in key:
                        continue
                    value = tag[key]
                    nice_key = self.tunetags.tag_lookup(key, True)
                    if '\n' in value:
                        description += '\n----\n%s: %s\n' % (nice_key, value)
                    else:
                        description += '\n%s: %s' % (nice_key, value)

        body = {
            'snippet': {
                'title': title,
                'description': description,
                'tags': tags,
                'categoryId': self.settings['category']
            },
            'status': {
                'privacyStatus': self.settings['privacy']
            }
        }

        # Call the API's videos.insert method to create and upload the video.
        insert_request = youtube.videos().insert(part=','.join(body.keys()),
                                                 body=body,
                                                 media_body=MediaFileUpload(
                                                     upfile,
                                                     chunksize=-1,
                                                     resumable=True))

        filesize = os.path.getsize(upfile)
        print('Uploading file... (filesize: %s)' % bytes_to_human(filesize))
        self.resumable_upload(insert_request)

    def resumable_upload(self, insert_request):
        '''
        This method implements an exponential backoff strategy to resume a
        failed upload.
        '''
        response = None
        error = None
        retry = 0
        while response is None:
            try:
                status, response = insert_request.next_chunk()
                if 'id' in response:
                    print('''Video ID `%s' was successfully uploaded. \
Its visibility is set to `%s'.''' % (response['id'], self.settings['privacy']))
                    print('''URL of the newly uploaded video: \
<https://www.youtube.com/watch?v=%s>''' % response['id'])
                    print('''It may take some time for the video to \
finish processing; typically 1-10 minutes.''')
                else:
                    error_exit('''The upload failed with an unexpected \
response: %s''' % response)
            except HttpError, e:
                if e.resp.status in self.retriable_status_codes:
                    error = '''A retriable HTTP error %d occurred:\n%s''' % (
                        e.resp.status, e.content)
                else:
                    raise
            except self.retriable_exceptions, e:
                error = 'A retriable error occurred: %s' % e

            if error is not None:
                print(error)
                retry += 1
                if retry > self.max_retries:
                    error_exit('''Too many upload errors. No longer \
attempting to retry.''')
                max_sleep = 2**retry
                sleep_seconds = random.random() * max_sleep
                print('''Sleeping %f seconds and then \
retrying...''' % sleep_seconds)
                time.sleep(sleep_seconds)
def main():
    # Verify arguments
    if len(sys.argv) != 7: 
        utils.error_exit("Usage: {} first_epsilon delta min_freq pvalue_mode exploreres evalres\n".format(os.path.basename(sys.argv[0])))
    exp_res_filename = sys.argv[5]
    if not os.path.isfile(exp_res_filename):
        utils.error_exit("{} does not exist, or is not a file\n".format(exp_res_filename))
    eval_res_filename = sys.argv[6]
    if not os.path.isfile(eval_res_filename):
        utils.error_exit("{} does not exist, or is not a file\n".format(eval_res_filename))
    pvalue_mode = sys.argv[4].upper()
    if pvalue_mode != "C" and pvalue_mode != "E" and pvalue_mode != "W":
        utils.error_exit("p-value mode must be 'c', 'e', or 'w'. You passed {}\n".format(pvalue_mode))
    try:
        first_epsilon = float(sys.argv[1])
    except ValueError:
        utils.error_exit("{} is not a number\n".format(sys.argv[1]))
    try:
        delta = float(sys.argv[2])
    except ValueError:
        utils.error_exit("{} is not a number\n".format(sys.argv[2]))
    try:
        min_freq = float(sys.argv[3])
    except ValueError:
        utils.error_exit("{} is not a number\n".format(sys.argv[3]))

    (trueFIs, stats) = get_trueFIs(exp_res_filename, eval_res_filename, min_freq,
            delta, pvalue_mode, first_epsilon)

    utils.print_itemsets(trueFIs, stats['orig_size'])

    sys.stderr.write("exp_res_file={},eval_res_file={},pvalue_mode={},d={},min_freq={},trueFIs={}\n".format(os.path.basename(exp_res_filename),os.path.basename(eval_res_filename), pvalue_mode, delta, min_freq, len(trueFIs)))
    sys.stderr.write("orig_size={},exp_size={},eval_size={}\n".format(stats['orig_size'],
        stats['exp_size'], stats['eval_size']))
    sys.stderr.write("exp_res={},exp_res_filtered={},eval_res={}\n".format(stats['exp_res'],
        stats['exp_res_filtered'], stats['eval_res']))
    sys.stderr.write("filter_epsilon={},tfis_from_exp={}\n".format(stats['filter_epsilon'], stats['tfis_from_exp']))
    sys.stderr.write("holdout_intersection={},holdout_false_negatives={}\n".format(stats['holdout_intersection'],
        stats['holdout_false_negatives']))
    sys.stderr.write("critical_value={},removed={},epsilon={}\n".format(stats['critical_value'],
        stats['removed'], stats['epsilon']))
    sys.stderr.write("exp_res_file,eval_res_file,pvalue_mode,delta,min_freq,trueFIs,orig_size,exp_size,eval_size,exp_res,exp_res_filtered,eval_res,filter_epsilon,tfis_from_exp,holdout_intersection,holdout_false_negatives,critical_value,removed,epsilon\n")
    sys.stderr.write("{}\n".format(",".join((str(i) for i in
        (os.path.basename(exp_res_filename), os.path.basename(eval_res_filename),
        pvalue_mode, delta, min_freq,len(trueFIs),
        stats['orig_size'], stats['exp_size'], stats['eval_size'],
        stats['exp_res'], stats['exp_res_filtered'], stats['eval_res'],
        stats['filter_epsilon'],
        stats['tfis_from_exp'], stats['holdout_intersection'],
        stats['holdout_false_negatives'], stats['critical_value'],
        stats['removed'], stats['epsilon'])))))
def main():
    # Verify arguments
    if len(sys.argv) != 7:
        utils.error_exit(
            "Usage: {} first_epsilon delta min_freq pvalue_mode exploreres evalres\n"
            .format(os.path.basename(sys.argv[0])))
    exp_res_filename = sys.argv[5]
    if not os.path.isfile(exp_res_filename):
        utils.error_exit(
            "{} does not exist, or is not a file\n".format(exp_res_filename))
    eval_res_filename = sys.argv[6]
    if not os.path.isfile(eval_res_filename):
        utils.error_exit(
            "{} does not exist, or is not a file\n".format(eval_res_filename))
    pvalue_mode = sys.argv[4].upper()
    if pvalue_mode != "C" and pvalue_mode != "E" and pvalue_mode != "W":
        utils.error_exit(
            "p-value mode must be 'c', 'e', or 'w'. You passed {}\n".format(
                pvalue_mode))
    try:
        first_epsilon = float(sys.argv[1])
    except ValueError:
        utils.error_exit("{} is not a number\n".format(sys.argv[1]))
    try:
        delta = float(sys.argv[2])
    except ValueError:
        utils.error_exit("{} is not a number\n".format(sys.argv[2]))
    try:
        min_freq = float(sys.argv[3])
    except ValueError:
        utils.error_exit("{} is not a number\n".format(sys.argv[3]))

    (trueFIs, stats) = get_trueFIs(exp_res_filename, eval_res_filename,
                                   min_freq, delta, pvalue_mode, first_epsilon)

    utils.print_itemsets(trueFIs, stats['orig_size'])

    sys.stderr.write(
        "exp_res_file={},eval_res_file={},pvalue_mode={},d={},min_freq={},trueFIs={}\n"
        .format(os.path.basename(exp_res_filename),
                os.path.basename(eval_res_filename), pvalue_mode, delta,
                min_freq, len(trueFIs)))
    sys.stderr.write("orig_size={},exp_size={},eval_size={}\n".format(
        stats['orig_size'], stats['exp_size'], stats['eval_size']))
    sys.stderr.write("exp_res={},exp_res_filtered={},eval_res={}\n".format(
        stats['exp_res'], stats['exp_res_filtered'], stats['eval_res']))
    sys.stderr.write("filter_epsilon={},tfis_from_exp={}\n".format(
        stats['filter_epsilon'], stats['tfis_from_exp']))
    sys.stderr.write(
        "holdout_intersection={},holdout_false_negatives={}\n".format(
            stats['holdout_intersection'], stats['holdout_false_negatives']))
    sys.stderr.write("critical_value={},removed={},epsilon={}\n".format(
        stats['critical_value'], stats['removed'], stats['epsilon']))
    sys.stderr.write(
        "exp_res_file,eval_res_file,pvalue_mode,delta,min_freq,trueFIs,orig_size,exp_size,eval_size,exp_res,exp_res_filtered,eval_res,filter_epsilon,tfis_from_exp,holdout_intersection,holdout_false_negatives,critical_value,removed,epsilon\n"
    )
    sys.stderr.write("{}\n".format(",".join(
        (str(i)
         for i in (os.path.basename(exp_res_filename),
                   os.path.basename(eval_res_filename), pvalue_mode, delta,
                   min_freq, len(trueFIs), stats['orig_size'],
                   stats['exp_size'], stats['eval_size'], stats['exp_res'],
                   stats['exp_res_filtered'], stats['eval_res'],
                   stats['filter_epsilon'], stats['tfis_from_exp'],
                   stats['holdout_intersection'],
                   stats['holdout_false_negatives'], stats['critical_value'],
                   stats['removed'], stats['epsilon'])))))
Example #32
0
def main():
    # Verify arguments
    if len(sys.argv) != 7:
        utils.error_exit(
            " ".join((
                "Usage: {}".format(os.path.basename(sys.argv[0])),
                "use_additional_knowledge={{0|1}} delta min_freq mode={{c|e}}",
                "dataset results_filename\n")))
    dataset = sys.argv[5]
    res_filename = sys.argv[6]
    if not os.path.isfile(res_filename):
        utils.error_exit(
            "{} does not exist, or is not a file\n".format(res_filename))
    pvalue_mode = sys.argv[4].upper()
    if pvalue_mode != "C" and pvalue_mode != "E" and pvalue_mode != "W":
        utils.error_exit(
            "p-value mode must be 'c', 'e', or 'w'. You passed {}\n".format(
                pvalue_mode))
    try:
        use_additional_knowledge = int(sys.argv[1])
    except ValueError:
        utils.error_exit("{} is not a number\n".format(sys.argv[1]))
    try:
        delta = float(sys.argv[2])
    except ValueError:
        utils.error_exit("{} is not a number\n".format(sys.argv[2]))
    try:
        min_freq = float(sys.argv[3])
    except ValueError:
        utils.error_exit("{} is not a number\n".format(sys.argv[3]))

    ds_stats = getDatasetInfo.get_ds_stats(dataset)

    (trueFIs, stats) = get_trueFIs(ds_stats, res_filename, min_freq, delta,
                                   pvalue_mode, use_additional_knowledge)

    utils.print_itemsets(trueFIs, ds_stats['size'])

    sys.stderr.write(
        ",".join(
            ("res_file={}".format(os.path.basename(res_filename)),
             "use_add_knowl={}".format(use_additional_knowledge),
             "pvalue_mode={}".format(pvalue_mode), "d={}".format(delta),
             "min_freq={}".format(min_freq),
             "trueFIs={}\n".format(len(trueFIs)))))
    sys.stderr.write(
        ",".join(
            ("union_bound_factor={}".format(stats['union_bound_factor']),
             "critical_value={}".format(stats['critical_value']),
             "removed={}".format(stats['removed']),
             "epsilon={}\n".format(stats['epsilon']))))
    sys.stderr.write(
        ",".join(
            ("res_file,add_knowl,pvalue_mode,delta,min_freq,trueFIs",
             "union_bound_factor,critical_value,removed,epsilon\n")))
    sys.stderr.write("{}\n".format(
        ",".join(
            (str(i) for i in (os.path.basename(res_filename),
             use_additional_knowledge, pvalue_mode, delta, min_freq,
             len(trueFIs), stats['union_bound_factor'],
             stats['critical_value'], stats['removed'], stats['epsilon'])))))
Example #33
0
def main(environment, game, level, player_img, use_graph, draw_all_labels,
         draw_dup_labels, draw_path, show_score, process, gen_prolog,
         dimensions, structure, summary, runtime, prolog):

    # Set environment variable
    if environment not in ENVIRONMENTS:
        utils.error_exit(
            "invalid environment - environment must be one of %s" %
            str(ENVIRONMENTS))
    if environment == 'maze':
        os.environ['MAZE'] = "1"

    if dimensions or structure or summary:
        if dimensions:
            print(Level.get_level_dimensions_in_tiles(game, level))
        if structure:
            Level.print_structural_txt(game, level)
        if summary:
            Level.print_tile_summary(game, level)
            Level.print_start_goal_tile_locations(game, level)
            print("Num gaps: %d" % Level.get_num_gaps(game, level))
        exit(0)

    if runtime:
        import json

        all_levels_process_info_file = utils.get_filepath(
            "", "all_levels_process_info.pickle")
        if not os.path.exists(all_levels_process_info_file):
            utils.error_exit("%s file not found" %
                             all_levels_process_info_file)
        all_levels_process_info = utils.read_pickle(
            all_levels_process_info_file)

        cur_game_level = "%s/%s" % (game, level)

        for process_key, process_runtimes in all_levels_process_info.items():
            if process_key == cur_game_level:
                print("----- Process Script Runtimes -----")
                print("Game: %s" % game)
                print("Level: %s" % level)
                print(json.dumps(process_runtimes, indent=2))
                exit(0)

        utils.error_exit("Run 'pypy3 main.py <environment> %s %s --process'" %
                         (game, level))

    if prolog:
        import json

        all_prolog_info_file = utils.get_filepath(
            "level_saved_files_block/prolog_files", "all_prolog_info.pickle")
        if not os.path.exists(all_prolog_info_file):
            utils.error_exit("%s file not found" % all_prolog_info_file)
        all_prolog_info = utils.read_pickle(all_prolog_info_file)

        prolog_exists = all_prolog_info.get(level)
        if prolog_exists:
            print("----- Prolog Info -----")
            print("Game: %s" % game)
            print("Level: %s" % level)
            for key, item in prolog_exists.items():
                print("%s: %s" % (key, str(item)))
            exit(0)

        utils.error_exit(
            "Run 'python main.py <environment> %s %s --gen_prolog'" %
            (game, level))

    if process:
        print("----- Creating Uniform Txt Layer File -----")
        Level.get_uniform_tile_chars(game, level)

        print("---- Processing Level -----")
        print("Game: %s" % game)
        print("Level: %s" % level)

        process_runtimes = []

        import enumerate
        state_graph_file, runtime = enumerate.main(game, level, player_img)
        process_runtimes.append(('enumerate', runtime))

        import extract_metatiles
        unique_metatiles_file, metatile_coords_dict_file, runtime = extract_metatiles.main(
            save_filename=level,
            player_img=player_img,
            print_stats=False,
            state_graph_files=[state_graph_file])
        process_runtimes.append(('extract_metatiles', runtime))

        import get_metatile_id_map
        id_metatile_map_file, metatile_id_map_file, runtime = get_metatile_id_map.main(
            save_filename=level,
            unique_metatiles_file=unique_metatiles_file,
            player_img=player_img)
        process_runtimes.append(('get_metatile_id_map', runtime))

        import get_tile_id_coords_map
        tile_id_extra_info_coords_map_file, runtime = get_tile_id_coords_map.main(
            game, level, metatile_coords_dict_file, metatile_id_map_file,
            player_img)
        process_runtimes.append(('get_tile_id_coords_map', runtime))

        import get_states_per_metatile
        runtime = get_states_per_metatile.main(
            save_filename=level,
            unique_metatiles_file=unique_metatiles_file,
            player_img=player_img,
            print_stats=False)
        process_runtimes.append(('get_states_per_metatile', runtime))

        import extract_constraints
        metatile_constraints_file, runtime = extract_constraints.main(
            save_filename=level,
            metatile_id_map_file=metatile_id_map_file,
            id_metatile_map_file=id_metatile_map_file,
            metatile_coords_dict_files=[metatile_coords_dict_file],
            player_img=player_img)
        process_runtimes.append(('extract_constraints', runtime))

        save_process_runtimes(process_key="%s/%s" % (game, level),
                              process_runtimes=process_runtimes)

    if gen_prolog:
        import gen_prolog
        metatile_constraints_file = "level_saved_files_%s/metatile_constraints/%s.pickle" % (
            player_img, level)
        if not os.path.exists(metatile_constraints_file):
            utils.error_exit(
                "%s file does not exist. Run 'pypy3 main.py %s %s %s --process' first"
                % (metatile_constraints_file, environment, game, level))
        prolog_file, runtime = gen_prolog.main(
            tile_constraints_file=metatile_constraints_file,
            debug=False,
            print_pl=False,
            save=True)
        save_process_runtimes(process_key="%s/%s" % (game, level),
                              process_runtimes=[('gen_prolog', runtime)])

    if not (process or gen_prolog):
        import platformer
        platformer.main(game, level, player_img, use_graph, draw_all_labels,
                        draw_dup_labels, draw_path, show_score)
Example #34
0
    def generate_video(self, audio, image):
        '''
        Encodes a video file from our audio and image input files.
        '''
        # Check to see if our files exist at all.
        if not (os.path.exists(audio) and os.path.exists(image)):
            error_exit('please specify a valid audio and image file')

        in_image_ext = os.path.splitext(image)[1]
        in_audio_ext = os.path.splitext(audio)[1]

        # Check our MP3/OGG/FLAC/etc file and get its duration.
        probe_cmd = [self.settings['path_ffprobe'], audio]
        try:
            probe_out = subprocess.check_output(probe_cmd,
                                                stderr=subprocess.STDOUT)
            if self.settings['verbose']:
                print(probe_out)
        except:
            error_exit('''couldn't probe the audio file \
(ffprobe might not be available)''')

        # Try to extract some metadata from the file using Mutagen.
        try:
            metadata = mutagen.File(audio)
        except AttributeError:
            metadata = []

        # Save a human-readable version of the metadata in the object.
        # Keep the original Mutagen output around too.
        self.settings['metadata'] = {}
        self.settings['orig_metadata'] = metadata
        if metadata is not None:
            for tag in metadata:
                item = metadata[tag]
                # We join the item in case it's still a list, as in the case
                # of Vorbis.
                if isinstance(item, (list, tuple)):
                    item = ''.join(item)
                self.settings['metadata'][self.tunetags.tag_lookup(tag)] = \
                    str(item)

        # Lift the actual track duration string out of the output.
        duration = re.findall('Duration: (.+?),', probe_out)

        # If we get valid output, parse the duration and get a seconds value.
        # Otherwise, stop the script.
        if len(duration):
            duration = duration[0]
        else:
            error_exit('''couldn't parse ffprobe's output. Try again with \
-v (--verbose) to see what went wrong.''')

        # Turn the string into a datetime format.
        try:
            audio_info = datetime.strptime(duration, '%H:%M:%S.%f')
            delta = timedelta(hours=audio_info.hour,
                              minutes=audio_info.minute,
                              seconds=audio_info.second,
                              microseconds=audio_info.microsecond)
        except ValueError:
            error_exit('''encountered an error trying to determine the \
duration of the audio file. It could be in an unrecognized format, or \
longer than 24 hours. (Duration: %s, exception: %s)''' %
                       (duration, sys.exc_info()[0]))

        print('Using image file `%s\', size: %s.' %
              (image, os.path.getsize(image)))
        print('Using audio file `%s\', size: %s, duration: %s.' %
              (audio, os.path.getsize(audio), duration))

        if self.settings['metadata'] == []:
            print("Couldn't extract audio file tags. Continuing.")
        else:
            print('Extracted %d tag(s) from the audio file.' %
                  len(self.settings['metadata']))

        print('Encoding video file...')

        # Now call ffmpeg and produce the video.
        ffmpeg_cmd = [
            self.settings['path_ffmpeg'],
            # loop the video (picture) for the movie's duration
            '-loop',
            '1',
            # a framerate of 1fps (anything lower won't be accepted by Youtube)
            '-framerate',
            '1:1',
            # one input file is the picture
            '-i',
            image,
            # automatically overwrite on duplicate
            '-y',
        ]
        # Add the audio file.
        if in_audio_ext == '.flac':
            # mp4 doesn't take flac very well, so we'll convert it.
            ffmpeg_cmd.extend([
                # one input file is the audio
                '-i',
                audio,
                # for compatibility with various builds, we'll use MP3
                '-c:a',
                'libmp3lame',
                # high quality CBR is good enough
                '-b:a',
                '320k',
            ])
        else:
            ffmpeg_cmd.extend([
                # one input file is the audio
                '-i',
                audio,
                # only copy the audio, don't re-encode it
                '-c:a',
                'copy',
            ])
        # Add the video encoding options.
        ffmpeg_cmd.extend([
            # use x264 as the video encoder
            '-c:v',
            'libx264',
            # duration of the video
            '-t',
            str(delta.total_seconds()),
            # 4:4:4 chroma subsampling (best quality)
            '-pix_fmt',
            'yuv444p',
            # as fast as possible, at cost of filesize
            # (uploading likely costs less time)
            '-preset',
            'ultrafast',
            # lossless quality
            '-qp',
            '0',
            # output
            self.settings['path_output']
        ])

        try:
            probe_out = subprocess.check_output(ffmpeg_cmd,
                                                stderr=subprocess.STDOUT)
            if self.settings['verbose']:
                print(probe_out)
        except:
            error_exit('''encountered an error trying to generate the video. \
Try again with -v (--verbose) to see what went wrong. \
(Exception: %s)''' % sys.exc_info()[0])

        print('Successfully generated the file `%s\'.' %
              self.settings['path_output'])
Example #35
0
def get_trueFIs(exp_res_filename, eval_res_filename, min_freq, delta,
        pvalue_mode, do_filter=0):
    """ Compute the True Frequent Itemsets using the holdout method.

    The holdout method is described in Geoffrey I. Webb, "Discovering
    significant patterns" in Machine Learning, Vol. 68, Issue (1), pp. 1-3,
    2007.

    The dataset is split in two parts, an exploratory part and an evaluation
    part. Each are mined separately at frequency 'min_freq'. The results are
    contained in 'exp_res_filename' and 'eval_res_filename' respectively.
    The parameter 'do_filter' controls a variant of the algorithm where the
    results from the exploratory part are filtered more.

    The p-values for the Binomial tests are computed using the mode specified
    by pvalue_mode: 'c' for Chernoff, 'e' for exact, or 'w' for weak Chernoff.
    The parameter 'use_additional_knowledge' can be used to incorporate
    additional knowledge about the data generation process.

    Returns a pair (trueFIs, stats).
    'trueFIs' is a dict whose keys are itemsets (frozensets) and values are
    frequencies. This collection of itemsets contains only TFIs with
    probability at least 1 - delta.
    'stats' is a dict containing various statistics used in computing the
    collection of itemsets."""

    stats = dict()

    with open(exp_res_filename) as FILE:
        size_line = FILE.readline()
        try:
            size_str = size_line.split("(")[1].split(")")[0]
        except IndexError:
            utils.error_exit(
                " ".join(
                    ("Cannot compute size of the explore dataset:",
                        "'{}' is not in a recognized format\n".format(
                            size_line))))
        try:
            stats['exp_size'] = int(size_str)
        except ValueError:
            utils.error_exit(
                " ".join(
                    ("Cannot compute size of the explore dataset:",
                     "'{}' is not a number\n".format(size_str))))

    with open(eval_res_filename) as FILE:
        size_line = FILE.readline()
        try:
            size_str = size_line.split("(")[1].split(")")[0]
        except IndexError:
            utils.error_exit(
                " ".join(
                    ("Cannot compute size of the eval dataset:",
                     "'{}' is not in a recognized format\n".format(
                         size_line))))
        try:
            stats['eval_size'] = int(size_str)
        except ValueError:
            utils.error_exit(
                " ".join(
                    "Cannot compute size of the eval dataset:",
                    "'{}' is not a number\n".format(size_str)))

    stats['orig_size'] = stats['exp_size'] + stats['eval_size']

    exp_res = utils.create_results(exp_res_filename, min_freq)
    stats['exp_res'] = len(exp_res)

    trueFIs = dict()

    supposed_freq = (math.ceil( stats['orig_size'] * min_freq) - 1) / stats['orig_size']
    stats['filter_critical_value'] = 0
    if do_filter > 0:
        stats['lowered_delta'] = 1 - math.sqrt(1 - delta)
        exp_res_filtered = dict()
        stats['filter_critical_value'] = math.log(stats['lowered_delta']) - do_filter
        last_accepted_freq = 1.0
        last_non_accepted_freq = 0.0
        for itemset in exp_res:
            if utils.pvalue(pvalue_mode, exp_res[itemset], stats['exp_size'],
                    supposed_freq) <= stats['filter_critical_value']:
                trueFIs[itemset] = exp_res[itemset]
                if exp_res[itemset] < last_accepted_freq:
                    last_accepted_freq = exp_res[itemset]
            else:
                exp_res_filtered[itemset] = exp_res[itemset]
                if exp_res[itemset] > last_non_accepted_freq:
                    last_non_accepted_freq = exp_res[itemset]
        # Compute epsilon for the binomial
        min_diff = 5e-6 # controls when to stop the binary search
        while last_accepted_freq - last_non_accepted_freq > min_diff:
            mid_point = (last_accepted_freq - last_non_accepted_freq) / 2
            test_freq = last_non_accepted_freq + mid_point
            p_value = utils.pvalue(pvalue_mode, test_freq,
                    stats['eval_size'], supposed_freq)
            if p_value <= stats['filter_critical_value']:
                last_accepted_freq = test_freq
            else:
                last_non_accepted_freq = test_freq
        stats['filter_epsilon'] = last_non_accepted_freq + ((last_accepted_freq - last_non_accepted_freq) / 2) - min_freq
    else:
        stats['lowered_delta'] = delta
        exp_res_filtered = exp_res
        stats['filter_epsilon'] = 1.0
    exp_res_filtered_set = set(exp_res_filtered.keys())
    stats['exp_res_filtered'] = len(exp_res_filtered_set)
    stats['tfis_from_exp'] = len(trueFIs)
    sys.stderr.write("do_filter: {}, tfis_from_exp: {}, exp_res_filtered: {}\n".format(do_filter, stats['tfis_from_exp'], stats['exp_res_filtered']))

    if stats['exp_res_filtered'] > 0:
        eval_res = utils.create_results(eval_res_filename, min_freq)
        eval_res_set = set(eval_res.keys())
        stats['eval_res'] = len(eval_res)

        intersection = exp_res_filtered_set & eval_res_set
        stats['holdout_intersection'] = len(intersection)
        stats['holdout_false_negatives'] = len(exp_res_filtered_set - eval_res_set)

        # Bonferroni correction (Union bound). We work in the log space.
        stats['critical_value'] = math.log(stats['lowered_delta']) - math.log(stats['exp_res_filtered'])

        # Add TFIs from eval
        last_accepted_freq = 1.0
        last_non_accepted_freq = min_freq
        for itemset in sorted(intersection, key=lambda x : eval_res[x], reverse=True):
            p_value = utils.pvalue(pvalue_mode, eval_res[itemset],
                    stats['eval_size'], supposed_freq)
            if p_value <= stats['critical_value']:
                trueFIs[itemset] = eval_res[itemset]
                last_accepted_freq = eval_res[itemset]
            else:
                last_non_accepted_freq = eval_res[itemset]
                break

        # Compute epsilon for the binomial
        min_diff = 5e-6 # controls when to stop the binary search
        while last_accepted_freq - last_non_accepted_freq > min_diff:
            mid_point = (last_accepted_freq - last_non_accepted_freq) / 2
            test_freq = last_non_accepted_freq + mid_point
            p_value = utils.pvalue(pvalue_mode, test_freq,
                    stats['eval_size'], supposed_freq)
            if p_value <= stats['critical_value']:
                last_accepted_freq = test_freq
            else:
                last_non_accepted_freq = test_freq

        stats['epsilon'] = last_non_accepted_freq + ((last_accepted_freq -
            last_non_accepted_freq) / 2) - min_freq
        stats['removed'] = len(intersection) - len(trueFIs)
    else: # stats['exp_res_filtered'] == 0
        stats['eval_res'] = 0
        stats['holdout_false_negatives'] = 0
        stats['holdout_intersection'] = 0
        stats['critical_value'] = 0
        stats['epsilon'] = 0
        stats['removed'] = 0

    return (trueFIs, stats)
Example #36
0
def main():
    # Verify arguments
    if len(sys.argv) != 8:
        utils.error_exit(" ".join(
            ("Usage: {}".format(os.path.basename(sys.argv[0])),
             "vcdim first_epsilon delta min_freq gap exploreres",
             "evalres\n")))
    exp_res_filename = sys.argv[6]
    if not os.path.isfile(exp_res_filename):
        utils.error_exit(
            "{} does not exist, or is not a file\n".format(exp_res_filename))
    eval_res_filename = sys.argv[7]
    if not os.path.isfile(eval_res_filename):
        utils.error_exit(
            "{} does not exist, or is not a file\n".format(eval_res_filename))
    try:
        vcdim = int(sys.argv[1])
    except ValueError:
        utils.error_exit("{} is not a number\n".format(sys.argv[1]))
    try:
        first_epsilon = float(sys.argv[2])
    except ValueError:
        utils.error_exit("{} is not a number\n".format(sys.argv[2]))
    try:
        delta = float(sys.argv[3])
    except ValueError:
        utils.error_exit("{} is not a number\n".format(sys.argv[3]))
    try:
        min_freq = float(sys.argv[4])
    except ValueError:
        utils.error_exit("{} is not a number\n".format(sys.argv[4]))
    try:
        gap = float(sys.argv[5])
    except ValueError:
        utils.error_exit("{} is not a number\n".format(sys.argv[5]))

    (trueFIs, stats) = get_trueFIs(exp_res_filename, eval_res_filename,
                                   min_freq, delta, gap, first_epsilon, vcdim)

    utils.print_itemsets(trueFIs, stats['orig_size'])

    sys.stderr.write(",".join(
        ("exp_res_file={}".format(os.path.basename(exp_res_filename)),
         "eval_res_file={}".format(os.path.basename(eval_res_filename)),
         "d={}".format(delta), "min_freq={}".format(min_freq),
         "trueFIs={}\n".format(len(trueFIs)))))
    sys.stderr.write("orig_size={},exp_size={},eval_size={}\n".format(
        stats['orig_size'], stats['exp_size'], stats['eval_size']))
    sys.stderr.write("exp_res={},exp_res_filtered={},eval_res={}\n".format(
        stats['exp_res'], stats['exp_res_filtered'], stats['eval_res']))
    sys.stderr.write(",".join(
        ("holdout_intersection={}".format(stats['holdout_intersection']),
         "holdout_false_positives={}".format(stats['holdout_false_positives']),
         "holdout_false_negatives={}".format(stats['holdout_false_negatives']),
         "holdout_jaccard={}\n".format(stats['holdout_jaccard']))))
    sys.stderr.write("e1={},e2={},vcdim={}\n".format(stats['epsilon_1'],
                                                     stats['epsilon_2'],
                                                     stats['vcdim']))
    sys.stderr.write(",".join(
        ("exp_res_file,eval_res_file,delta,min_freq,trueFIs",
         "orig_size,exp_size,eval_size,exp_res,eval_res",
         "holdout_intersection,holdout_false_positives",
         "holdout_false_negatives,holdout_jaccard,e1,e2,vcdim\n")))
    sys.stderr.write("{}\n".format(",".join(
        (str(i)
         for i in (os.path.basename(exp_res_filename),
                   os.path.basename(eval_res_filename), delta, min_freq,
                   len(trueFIs), stats['orig_size'], stats['exp_size'],
                   stats['eval_size'], stats['exp_res'], stats['eval_res'],
                   stats['holdout_intersection'],
                   stats['holdout_false_positives'],
                   stats['holdout_false_negatives'], stats['holdout_jaccard'],
                   stats['epsilon_1'], stats['epsilon_2'], stats['vcdim'])))))
Example #37
0
import argparse

from utils import read_config, error_exit
import local  # Beam flow with local batch processing
import gcloud  # Beam flow with stream processing using GC features

# --------------------------------------------------------------------
# Testing Beam...

if __name__ == '__main__':

    parser = argparse.ArgumentParser(
        description='Running the selected demo beam pipeline')
    parser.add_argument('--cfgfile',
                        default='config.json',
                        help='Config file name with path')
    parser.add_argument('--pipeline', help='Pipeline to run: local | gcp')
    args = parser.parse_args()

    cfg = read_config(args.cfgfile)

    if args.pipeline == 'local':
        local.run_pipeline(cfg)
    elif args.pipeline == 'gcp':
        gcloud.run_pipeline(cfg)
    else:
        error_exit(
            'Invalid option for argument --pipeline.\nValid options: local | gcp'
        )

    print('\nNormal program termination.\n')
    save_hr_chunks = False
    debug = True

    # Set display dimensions to training level dimensions
    if chunk_width is None or chunk_height is None:  # no alt chunk size specified
        display_w = None
        display_h = None
        all_levels_info_file = "../platformer/level_saved_files_block/all_levels_info.json"
        levels = read_json(all_levels_info_file).get('contents')
        for level_info in levels:
            if level_info.get('level_name') == training_level:
                display_w = level_info.get('level_width')
                display_h = level_info.get('level_height')
                break
        if display_w is None or display_h is None:
            error_exit("%s level not found in all_levels_info.json" %
                       training_level)

        chunk_width = int(display_w / TILE_DIM)
        chunk_height = int(display_h / TILE_DIM)

    else:
        display_w = chunk_width * TILE_DIM
        display_h = chunk_height * TILE_DIM

    tileset = "tilesets/platformer/%s.json" % training_level
    scale_w = 1
    scale_h = 1

    command_str = "python %s.py --tileset %s --display_width %d --display_height %d " \
                  "--scale_width %d --scale_height %d --chunk_width %d --chunk_height %d --failureMillis %d" % \
                  (program, tileset, display_w, display_h, scale_w, scale_h, chunk_width, chunk_height,
Example #39
0
def main(trial, levels, num_sol, asp, state_graph):

    if not (asp or state_graph):
        utils.error_exit(
            "Must specify at least one validation test to run: --asp or --state_graph"
        )

    # Get file formats
    config_formats = TRIAL_CONFIG_FORMATS.get(trial)
    if config_formats is None:
        utils.error_exit("--trial must be one of %s" %
                         str(list(TRIAL_CONFIG_FORMATS.keys())))
    prolog_file_format = "level_saved_files_block/prolog_files/%s.pl"
    model_str_file_format = "level_saved_files_block/generated_level_model_strs/%s.txt"
    assignments_dict_file_format = "level_saved_files_block/generated_level_assignments_dicts/%s.pickle"

    # Initialize validation counts
    asp_checked_count = 0
    asp_valid_count = 0
    state_graph_checked_count = 0
    state_graph_valid_count = 0

    # Validate each solver run
    for level in levels:
        for config_file_format in config_formats:
            for sol in range(num_sol):
                prolog_file = prolog_file_format % level
                prolog_filename = utils.get_basepath_filename(
                    prolog_file, 'pl')
                config_file = config_file_format % level
                config_filename = utils.get_basepath_filename(
                    config_file, 'json')
                answer_set_filename = '_'.join(
                    [prolog_filename, config_filename,
                     'a%d' % sol])

                if asp:
                    # Determine ASP checks to perform based on config file contents
                    config_file_contents = utils.read_json(config_file)
                    config = config_file_contents['config']
                    require_all_platforms_reachable = True
                    require_all_bonus_tiles_reachable = True
                    if config.get(
                            'require_all_platforms_reachable') is not None:
                        require_all_platforms_reachable = eval(
                            config['require_all_platforms_reachable'])
                    if config.get(
                            'require_all_bonus_tiles_reachable') is not None:
                        require_all_bonus_tiles_reachable = eval(
                            config['require_all_bonus_tiles_reachable'])

                    prolog_file_info = get_prolog_file_info(prolog_file)
                    tile_ids = get_tile_ids_dictionary(prolog_file_info)
                    model_str_file = model_str_file_format % answer_set_filename

                    if os.path.exists(model_str_file):
                        model_str = utils.read_txt(model_str_file)
                        asp_valid = Solver.asp_is_valid(
                            check_path=True,
                            check_onground=require_all_platforms_reachable,
                            check_bonus=require_all_bonus_tiles_reachable,
                            model_str=model_str,
                            player_img='block',
                            answer_set_filename=answer_set_filename,
                            tile_ids=tile_ids,
                            save=False)
                        status = "ASP VALID" if asp_valid else "ASP INVALID"
                        print("%s: %s" % (answer_set_filename, status))
                        asp_checked_count += 1
                        asp_valid_count += 1 if asp_valid else 0

                if state_graph:
                    assignments_dict_file = assignments_dict_file_format % answer_set_filename
                    if os.path.exists(assignments_dict_file):
                        assignments_dict = utils.read_pickle(
                            assignments_dict_file)
                        valid_path = Solver.get_state_graph_valid_path(
                            assignments_dict=assignments_dict,
                            player_img='block',
                            prolog_filename=prolog_filename,
                            answer_set_filename=answer_set_filename,
                            save=True)
                        status = "GRAPH VALID" if valid_path else "GRAPH INVALID"
                        print("%s: %s" % (answer_set_filename, status))
                        state_graph_checked_count += 1
                        state_graph_valid_count += 1 if valid_path is not None else 0

    # Print validation results summary
    if asp:
        print("ASPs Checked: %d" % asp_checked_count)
        print("ASPs Valid: %d" % asp_valid_count)

    if state_graph:
        print("State Graphs Checked: %d" % state_graph_checked_count)
        print("State Graphs Valid: %d" % state_graph_valid_count)
Example #40
0
def main(game, level, player_img, use_graph, draw_all_labels, draw_dup_labels, draw_path, show_score):

    # Create the Level
    level_obj = Level.generate_level_from_file(game, level)

    # Level saved files
    state_graph_file = "level_saved_files_%s/enumerated_state_graphs/%s/%s.gpickle" % (player_img, game, level)

    if game == "generated" and os.path.exists("level_saved_files_%s/generated_level_paths/%s.pickle" % (player_img, level)):
        generated_level_path_coords = read_pickle("level_saved_files_%s/generated_level_paths/%s.pickle" % (player_img, level))
    else:
        generated_level_path_coords = None

    if use_graph and os.path.exists(state_graph_file):
        print("***** USING ENUMERATED STATE GRAPH *****")
        state_graph = nx.read_gpickle(state_graph_file)
    else:
        print("***** USING MANUAL CONTROLS *****")
        state_graph = None

    edge_actions_dict = None if state_graph is None else nx.get_edge_attributes(state_graph, 'action')

    # Background
    FPS = 40  # frame rate
    ANI = 4  # animation cycles
    WORLD_X = min(level_obj.width, MAX_WIDTH)
    WORLD_Y = min(level_obj.height, MAX_HEIGHT)
    clock = pygame.time.Clock()
    pygame.init()
    world = pygame.display.set_mode([WORLD_X, WORLD_Y])
    BACKGROUND_COLOR = COLORS.get('DARK_GRAY')

    # Player
    player_model = Player(player_img, level_obj)
    player_view = PlayerView(player_img)
    player_list = pygame.sprite.Group()
    player_list.add(player_view)

    # Level
    platform_sprites = get_sprites(level_obj.get_platform_coords(), 'block_tile.png')
    goal_sprites = get_sprites(level_obj.get_goal_coords(), 'goal_tile.png')
    bonus_sprites = get_sprites(level_obj.get_bonus_coords(), 'bonus_tile.png')
    one_way_platform_sprites = get_sprites(level_obj.get_one_way_platform_coords(), 'one_way_block_tile.png')
    hazard_sprites = get_sprites(level_obj.get_hazard_coords(), 'hazard_tile.png')
    wall_sprites = get_sprites(level_obj.get_wall_coords(), 'block_tile.png')
    collected_bonus_tile_coords_dict = {}

    # Camera
    camera = Camera(Camera.camera_function, level_obj.width, level_obj.height, WORLD_X, WORLD_Y)

    # Setup drawing metatile labels
    if draw_all_labels or draw_dup_labels:
        metatile_labels, font_color, label_padding = \
            setup_metatile_labels(game, level, player_img, draw_all_labels, draw_dup_labels)

    # Setup drawing solution path
    if draw_path:
        path_font_color = COLORS.get('GREEN')
        start_font_color = COLORS.get('BLUE')
        goal_font_color = COLORS.get('RED')

        if generated_level_path_coords is not None:
            path_coords = generated_level_path_coords
            start_coord = generated_level_path_coords[0]
            goal_coord = generated_level_path_coords[-1]

        elif os.path.exists(state_graph_file):
            graph = nx.read_gpickle(state_graph_file)
            shortest_path_dict = shortest_path_xy(graph)
            path_coords = shortest_path_dict.get("path_coords")
            start_coord = shortest_path_dict.get("start_coord")
            goal_coord = shortest_path_dict.get("goal_coord")

        else:
            error_exit("No enumerated state graph available to draw solution path")

    # Input handling
    input_handler = Inputs()

    # Main Loop
    main = True

    while main:
        input_handler.onLoop()
        
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                pygame.quit()
                main = False

            if event.type == pygame.KEYDOWN:
                if event.key == ord('q'):
                    pygame.quit()
                    main = False
                    sys.exit()
                elif event.key == ord('r'):
                    player_model.reset()
                    collected_bonus_tile_coords_dict = {}
                    platform_sprites = get_sprites(level_obj.get_platform_coords(), 'block_tile.png')

            input_handler.onEvent(event)

        if not main:
            break

        world.fill(BACKGROUND_COLOR)
        camera.update(player_view)  # set camera to track player

        # Update Player model and view
        player_model.update(action=input_handler.getAction(),
                            precomputed_graph=state_graph, edge_actions_dict=edge_actions_dict)

        player_view.update(player_model.state.x, player_model.state.y,
                           player_model.half_player_w, player_model.half_player_h)

        # Update the current score
        hit_bonus_coord = player_model.get_hit_bonus_coord()

        if hit_bonus_coord is not '':
            hit_bonus_coord_x = player_model.state.x // TILE_DIM
            hit_bonus_coord_y = player_model.state.y // TILE_DIM - 1
            if hit_bonus_coord == 'N':
                pass
            elif hit_bonus_coord == 'NE':
                hit_bonus_coord_x += 1
            elif hit_bonus_coord == 'NW':
                hit_bonus_coord_x -= 1
            else:
                error_exit("unrecognized hit bonus coord")

            hit_bonus_coord_xy = (hit_bonus_coord_x * TILE_DIM, hit_bonus_coord_y * TILE_DIM)

            if hit_bonus_coord_xy not in level_obj.get_bonus_coords():
                error_exit("hit bonus tile that is not there: " + str(hit_bonus_coord_xy))

            if collected_bonus_tile_coords_dict.get(hit_bonus_coord_xy) is None:
                collected_bonus_tile_coords_dict[hit_bonus_coord_xy] = 1
                platform_sprites.add(Tile(hit_bonus_coord_xy[0], hit_bonus_coord_xy[1], 'block_tile.png'))

        score = len(collected_bonus_tile_coords_dict) * 10

        # Draw sprites
        entities_to_draw = []
        entities_to_draw += list(bonus_sprites)  # draw bonus tiles
        entities_to_draw += list(platform_sprites)  # draw platforms tiles
        entities_to_draw += list(one_way_platform_sprites)  # draw one-way platform tiles
        entities_to_draw += list(hazard_sprites)
        entities_to_draw += list(wall_sprites)
        entities_to_draw += list(player_list)  # draw player
        entities_to_draw += list(goal_sprites)  # draw goal tiles

        for e in entities_to_draw:
            world.blit(e.image, camera.apply(e))

        # Draw metatile labels
        if draw_all_labels or draw_dup_labels:
            for coord in level_obj.get_all_possible_coords():  # draw metatile border outlines
                tile_rect = pygame.Rect(coord[0], coord[1], TILE_DIM, TILE_DIM)
                tile_rect = camera.apply_to_rect(tile_rect)  # adjust based on camera
                pygame.draw.rect(world, font_color, tile_rect, 1)

            for label in metatile_labels:  # draw metatile labels
                surface, label_x, label_y = label
                label_x, label_y = camera.apply_to_coord((label_x, label_y))
                world.blit(surface, (label_x + label_padding[0], label_y + label_padding[1]))

        # Draw level solution path
        if draw_path:
            for coord in path_coords:
                if coord == start_coord:
                    color = start_font_color
                elif coord == goal_coord:
                    color = goal_font_color
                else:
                    color = path_font_color
                coord = eval(coord)
                path_component = pygame.Rect(coord[0], coord[1], 2, 2)
                path_component = camera.apply_to_rect(path_component)
                pygame.draw.rect(world, color, path_component, 1)

        # Draw text labels
        label_rect_pairs = []
        if player_model.goal_reached():
            score += 50
            labels = [
                ("You Win!", 50, COLORS.get('GREEN')),
                ("Score: %d" % score, 30, COLORS.get('YELLOW')),
                ("Press 'R' to replay or 'Q' to quit", 30, COLORS.get('YELLOW'))
            ]
            label_rect_pairs = get_label_rect_pairs(center_x=WORLD_X/2, labels=labels)

        elif player_model.is_dead():
            labels = [
                ("Game Over", 50, COLORS.get('RED')),
                ("Score: %d" % score, 30, COLORS.get('YELLOW')),
                ("Press 'R' to replay or 'Q' to quit", 30, COLORS.get('YELLOW'))
            ]
            label_rect_pairs = get_label_rect_pairs(center_x=WORLD_X / 2, labels=labels)

        elif show_score:
            labels = [("Score: %d" % score, 50, COLORS.get('YELLOW'))]
            label_rect_pairs = get_label_rect_pairs(center_x=WORLD_X / 2, labels=labels)

        for label, label_rect in label_rect_pairs:
            world.blit(label, label_rect)

        pygame.display.flip()
        clock.tick(FPS)
Example #41
0
def main():
    if len(sys.argv) != 7:
        utils.error_exit(" ".join(
            ("USAGE: {}".format(os.path.basename(sys.argv[0])),
             "use_additional_knowledge={{0|1}} delta min_freq gap dataset",
             "results_filename\n")))
    dataset = sys.argv[5]
    res_filename = os.path.expanduser(sys.argv[6])
    if not os.path.isfile(res_filename):
        utils.error_exit(
            "{} does not exist, or is not a file\n".format(res_filename))
    try:
        use_additional_knowledge = int(sys.argv[1])
    except ValueError:
        utils.error_exit("{} is not a number\n".format(sys.argv[1]))
    try:
        delta = float(sys.argv[2])
    except ValueError:
        utils.error_exit("{} is not a number\n".format(sys.argv[2]))
    try:
        min_freq = float(sys.argv[3])
    except ValueError:
        utils.error_exit("{} is not a number\n".format(sys.argv[3]))
    try:
        gap = float(sys.argv[4])
    except ValueError:
        utils.error_exit("{} is not a number\n".format(sys.argv[4]))

    ds_stats = getDatasetInfo.get_ds_stats(dataset)

    (trueFIs, stats) = get_trueFIs(ds_stats, res_filename, min_freq, delta,
                                   gap, use_additional_knowledge)

    utils.print_itemsets(trueFIs, ds_stats['size'])

    sys.stderr.write(",".join(
        ("res_file={}".format(os.path.basename(res_filename)),
         "use_add_knowl={}".format(use_additional_knowledge),
         "e1={},e2={}".format(stats['epsilon_1'],
                              stats['epsilon_2']), "d={}".format(delta),
         "min_freq={},trueFIs={}\n".format(min_freq, len(trueFIs)))))
    sys.stderr.write(",".join(
        ("base_set={}".format(stats['base_set']),
         "maximal_itemsets={}".format(stats['maximal_itemsets']),
         "negbor={}".format(stats['negative_border']),
         "emp_vc_dim={}".format(stats['emp_vc_dim']),
         "not_emp_vc_dim={}\n".format(stats['not_emp_vc_dim']))))
    sys.stderr.write(",".join(
        ("res_file,add_knowl,e1,e2,delta,min_freq,trueFIs",
         "base_set,maximal_itemsets,negative_border,emp_vc_dim",
         "not_emp_vc_dim\n")))
    sys.stderr.write("{}\n".format(",".join(
        (str(i)
         for i in (os.path.basename(res_filename), use_additional_knowledge,
                   stats['epsilon_1'], stats['epsilon_2'], delta, min_freq,
                   len(trueFIs), stats['base_set'], stats['maximal_itemsets'],
                   stats['negative_border'], stats['emp_vc_dim'],
                   stats['not_emp_vc_dim'])))))
Example #42
0
def install_riemann():
    langohr_source_url = ctx.node.properties['langohr_jar_source_url']
    daemonize_source_url = ctx.node.properties['daemonize_rpm_source_url']
    riemann_source_url = ctx.node.properties['riemann_rpm_source_url']
    # Needed for Riemann's config
    cloudify_resources_url = ctx.node.properties['cloudify_resources_url']
    rabbitmq_username = ctx.node.properties['rabbitmq_username']
    rabbitmq_password = ctx.node.properties['rabbitmq_password']

    riemann_config_path = '/etc/riemann'
    riemann_log_path = '/var/log/cloudify/riemann'
    langohr_home = '/opt/lib'
    extra_classpath = '{0}/langohr.jar'.format(langohr_home)

    # Confirm username and password have been supplied for broker before
    # continuing.
    # Components other than logstash and riemann have this handled in code.
    # Note that these are not directly used in this script, but are used by the
    # deployed resources, hence the check here.
    if not rabbitmq_username or not rabbitmq_password:
        utils.error_exit(
            'Both rabbitmq_username and rabbitmq_password must be supplied '
            'and at least 1 character long in the manager blueprint inputs.')

    ctx.instance.runtime_properties['rabbitmq_endpoint_ip'] = \
        utils.get_rabbitmq_endpoint_ip()

    ctx.logger.info('Installing Riemann...')
    utils.set_selinux_permissive()

    utils.copy_notice('riemann')
    utils.mkdir(riemann_log_path)
    utils.mkdir(langohr_home)
    utils.mkdir(riemann_config_path)
    utils.mkdir('{0}/conf.d'.format(riemann_config_path))

    langohr = utils.download_cloudify_resource(langohr_source_url)
    utils.sudo(['cp', langohr, extra_classpath])
    ctx.logger.info('Applying Langohr permissions...')
    utils.sudo(['chmod', '644', extra_classpath])
    utils.yum_install(daemonize_source_url)
    utils.yum_install(riemann_source_url)

    utils.logrotate('riemann')

    ctx.logger.info('Downloading cloudify-manager Repository...')
    manager_repo = utils.download_cloudify_resource(cloudify_resources_url)
    ctx.logger.info('Extracting Manager Repository...')
    utils.untar(manager_repo, '/tmp')
    ctx.logger.info('Deploying Riemann manager.config...')
    utils.move(
        '/tmp/plugins/riemann-controller/riemann_controller/resources/manager.config',  # NOQA
        '{0}/conf.d/manager.config'.format(riemann_config_path))

    ctx.logger.info('Deploying Riemann conf...')
    utils.deploy_blueprint_resource('{0}/main.clj'.format(CONFIG_PATH),
                                    '{0}/main.clj'.format(riemann_config_path))

    # our riemann configuration will (by default) try to read these environment
    # variables. If they don't exist, it will assume
    # that they're found at "localhost"
    # export MANAGEMENT_IP=""
    # export RABBITMQ_HOST=""

    # we inject the management_ip for both of these to Riemann's systemd
    # config.
    # These should be potentially different
    # if the manager and rabbitmq are running on different hosts.
    utils.systemd.configure('riemann')
    utils.clean_var_log_dir('riemann')
Example #43
0
def build_processor(arguments):
    splitter = re.compile(DEFAULT_REGEXP_SPLITTER)

    group_by_list = arguments['--group-by']
    arguments['--list-group-by'] = list()
    arguments['--list-group-by-type'] = list()
    for arg in group_by_list.split(','):
        group_by_element = splitter.match(arg).groupdict()
        if group_by_element is None:
            error_exit('incorrect item group-by of fields data "%s"' % arg)
        arguments['--list-group-by'].append(group_by_element['key'])
        arguments['--list-group-by-type'].append(group_by_element['value'])
    arguments['--group-by'] = ','.join(arguments['--list-group-by'])
    arguments['--group-by-type'] = ','.join(arguments['--list-group-by-type'])

    uni_count_element = splitter.match(arguments['--uni-count']).groupdict()
    if uni_count_element is None:
        error_exit('incorrect item uni-count of fields data "%s"' % arguments['--uni-count'])
    arguments['--uni-count'] = uni_count_element['key']
    arguments['--uni-count-type'] = uni_count_element['value']

    fields = arguments['<var>']
    if arguments['print']:
        label = ', '.join(fields.keys()) + ':'
        selections = ', '.join(fields.keys())
        query = 'select %s from log group by %s' % (selections, selections)
        report_queries = [(label, query)]
    elif arguments['top']:
        limit = int(arguments['--limit'])
        report_queries = []
        for var in fields.keys():
            label = 'top %s' % var
            query = 'select %s, count(1) as count from log group by %s order by count desc limit %d' % (var, var, limit)
            report_queries.append((label, query))
    elif arguments['avg']:
        label = 'average %s' % fields.keys()
        selections = ', '.join('avg(%s)' % var for var in fields.keys())
        query = 'select %s from log' % selections
        report_queries = [(label, query)]
    elif arguments['sum']:
        label = 'sum %s' % fields.keys()
        selections = ', '.join('sum(%s)' % var for var in fields.keys())
        query = 'select %s from log' % selections
        report_queries = [(label, query)]
    elif arguments['query']:
        report_queries = arguments['<query>']
        fields = arguments['<fields>']
    else:
        report_queries = [(name, query % arguments) for name, query in DEFAULT_QUERIES]
        fields = dict(DEFAULT_FIELDS, **dict(zip(arguments['--list-group-by'], arguments['--list-group-by-type'])))
        fields[arguments['--uni-count']] = arguments['--uni-count-type']

    for label, query in report_queries:
        logging.info('query for "%s":\n %s', label, query)

    auto_rotate = dict()
    limit_time = int(arguments['--auto-rotate'])
    auto_rotate['enabled'] = True if limit_time else False
    auto_rotate['interval'] = DEFAULT_LIMIT_TIME if limit_time < 0 or limit_time > 2592000 else limit_time
    auto_rotate['last_timestamp'] = ''
    if auto_rotate['enabled']:
        auto_rotate['get_last_ts_query'] = 'select max(time_local) from log'
        auto_rotate['delete_old_rows_query'] = 'delete from log where datetime(time_local) < ' \
                                               'datetime(:last_timestamp, "-" || :interval || " seconds")'
        logging.info('query for select last timestamp: %s', auto_rotate['get_last_ts_query'])
        logging.info('query for delete old rows: %s', auto_rotate['delete_old_rows_query'])

    if not arguments['--time-rpl-expr']:
        arguments['--time-rpl-expr'] = DEFAULT_REPLACE_EXPRESSION

    processor_fields = dict()
    if type(fields) is str:
        for field in fields:
            items = field.split(',')
            for item in items:
                fields_element = splitter.match(item).groupdict()
                if fields_element is None:
                    error_exit('failed parsing of field data "%s"' % item)
                processor_fields[fields_element['key']] = fields_element['value']
    elif type(fields) is dict:
        processor_fields = fields
    else:
        error_exit('incorrect type of fields data "%s"' % str(type(fields)))

    processor = SQLProcessor(report_queries, processor_fields, auto_rotate)
    return processor
Example #44
0
def create_project(project_name, default, helloworld, api, spa):
    """
    This function is responsible for interacting user with file creation.
    Args:
        project_name (string): This is the project name that will be used for project creation.
        api (bool): This flag says if project is going to have api-like boilerplate structure.
        spa (bool): This flag says if project is going to have spa-like boilerplate structure.

    Raises:
        FileExistsError: If project_name param has the same value as some of the directories in the current directory.

    """
    # getting arguments and options from the locals() function
    options = locals()
    # project_name is removed since we want to browse through options and project_name isn't necessary
    options.pop('project_name')

    # if none of the options was selected, fall back to default
    if [i for i in options.values()].count(True) == 0:
        options['default'] = True

    # seeing if there are more than 2 options selected
    elif [i for i in options.values()].count(True) > 1:
        error_exit("Please make sure only 1 option is selected and try again.")

    # seeing if project_name matches any of directories in the current directory
    try:
        create_folder(project_name)

    except FileExistsError:
        error_exit(
            'That directory already exists. Please check your project name and try again.'
        )

    # printing when project creation is starting
    click.echo(NEWLINE + 'Creating a new Flask app in ' +
               colored(f'~/{project_name}', 'green') + '.')
    click.echo(NEWLINE)

    # create venv if helloworld option is not selected
    if not helloworld:
        create_venv(f'./{project_name}/venv/')

    # deciding which boilerplate to choose and creating it based on argument choice
    base_dir = os.path.dirname(__file__)

    # iterating over names and values in options dictionary
    for name, value in options.items():
        if value:
            choice = os.path.join(base_dir, name)
    # copy the boilerplate filetree to the project folder
    try:
        copy_filetree(choice, f"./{project_name}/")
    except Exception as e:
        error_exit(e)

    # output hell starts here
    click.echo(f'Success! Created app {project_name} in {os.getcwd()}' +
               f'/{project_name}')
    click.echo('Inside that directory you can run several commands:')
    click.echo(NEWLINE)

    # print commands and descriptions
    print_command('python run.py',
                  'Starts the server, default config is set to development.')
    if not helloworld:

        print_command('export secret_key=STRING',
                      'Sets the secret key for your app.')

        print_command(
            'export PRODUCTION=True',
            'Sets production config for your app. Setting it to False will set the development config.'
        )

        print_command(
            'source venv/bin/activate (unix) \n\t./venv/Scripts/activate  (windows)',
            'Activate the virtual enviroment for the app.')

        print_command(
            'pip install -r requirements.txt',
            'Install the packages listed in requirements.txt into the venv.')

        click.echo('We suggest that you start by typing:')
        click.echo(colored('\tcd ', 'cyan') + colored(project_name, 'white'))
        click.echo(
            colored(
                '\tsource venv/bin/activate' if not system() == 'Windows' else
                '\t./venv/Scripts/activate', 'cyan'))
        click.echo(
            colored('\tpip install -r ', 'cyan') +
            colored('requirements.txt', 'white'))
        click.echo(colored('\tpython run.py', 'cyan'))
    else:
        click.echo('We suggest that you start by typing:')
        click.echo(colored('\tcd ', 'cyan') + colored(project_name, 'white'))
        click.echo(colored('\tpip install flask ', 'cyan'))
        click.echo(colored('\tpython app.py'))

    click.echo(NEWLINE + 'Happy hacking!')
Example #45
0
def get_solver_config(config, prolog_file_info):

    # ----- LEVEL DIMENSIONS -----
    level_w = config['level_dimensions']['width']
    level_h = config['level_dimensions']['height']

    # ----- FORCE TILE TYPE (tiles at specified coords must be a certain type) -----
    forced_tiles = {}
    if config.get('force_tile_type') is not None:
        for tile_type, coord_strs in config['force_tile_type'].items():
            check_tile_type_exists_in_prolog(
                tile_type, prolog_file_info,
                'cannot force tile type (%s: %s)' % (tile_type, coord_strs))
            forced_tiles[tile_type] = eval(coord_strs)

    # ----- SOFT CONSTRAINTS -----
    soft_constraints = {
        "num_tile_ranges": False,
        "perc_tile_ranges": False,
        "perc_level_ranges": False
    }
    if config.get('soft_constraints') is not None:
        for constraint_key, constraint_value in config.get(
                'soft_constraints').items():
            soft_constraints[constraint_key] = eval(constraint_value)

    # ----- SPECIFY NUM TILE RANGES (for a certain type) -----
    num_tile_ranges = {}
    lo, hi = 0, level_w * level_h
    if config.get('num_tile_ranges') is not None:
        for tile_type, range_str in config['num_tile_ranges'].items():
            check_tile_type_exists_in_prolog(
                tile_type, prolog_file_info,
                'cannot force num tile range %s' % range_str)
            min_tiles, max_tiles = eval(range_str)
            num_tile_ranges[tile_type] = setup_tile_freq_range(
                tile_type, min_tiles, max_tiles, lo, hi)

        # Check if total min tiles > total tiles
        min_total = 0
        for tile_type, tile_range in num_tile_ranges.items():
            min_total += tile_range[0]
        if min_total > level_w * level_h:
            error_exit(
                "Sum of min tiles (%d) in specified num_tile_ranges cannot exceed the total number of tiles "
                "available in the generated level (%d)" %
                (min_total, level_w * level_h))

    # ----- SPECIFY PERCENT TILE RANGES (for a certain type) -----
    perc_tile_ranges = {}
    lo, hi = 0, 100
    for tile_type in METATILE_TYPES:
        perc_tile_ranges[tile_type] = (lo, hi)

    if config.get('perc_tile_ranges') is not None:
        for tile_type, range_str in config['perc_tile_ranges'].items():
            check_tile_type_exists_in_prolog(
                tile_type, prolog_file_info,
                'cannot force perc tile range %s' % range_str)
            min_perc_tiles, max_perc_tiles = eval(range_str)
            perc_tile_ranges[tile_type] = setup_tile_freq_range(
                tile_type, min_perc_tiles, max_perc_tiles, lo, hi)

        # Check if total min perc tiles > 100%
        min_perc_total = 0
        for tile_type, tile_range in perc_tile_ranges.items():
            min_perc_total += tile_range[0]
        if min_perc_total > 100:
            error_exit(
                "Sum of min perc tiles (%d) in specified perc_tile_ranges cannot exceed 100%%"
                % min_perc_total)

    # ----- SPECIFY PERCENT TILE RANGES (from a certain level) -----
    level_ids_map = prolog_file_info.get('level_ids_map')
    perc_level_ranges = {}
    lo, hi = 0, 100
    for level, ids in level_ids_map.items():
        perc_level_ranges[level] = (lo, hi)

    if config.get('perc_level_ranges') is not None:
        for level, range_str in config['perc_level_ranges'].items():
            if level_ids_map.get(level) is None:
                error_exit(
                    "The tileset does not contain tiles from level (%s) (specified in perc_level_"
                    "ranges). Valid levels are: %s" %
                    (level, str(list(level_ids_map.keys()))))
            min_perc_level, max_perc_level = eval(range_str)
            perc_level_ranges[level] = setup_tile_freq_range(
                level, min_perc_level, max_perc_level, lo, hi)

        # Check if total min perc levels > 100%
        min_perc_level_total = 0
        for level, tile_range in perc_level_ranges.items():
            min_perc_level_total += tile_range[0]
        if min_perc_level_total > 100:
            error_exit(
                "Sum of min perc tiles (%d) from each level specified in perc_level_ranges cannot exceed 100%%"
                % min_perc_level_total)

    # ----- SPECIFY START/GOAL POSITION RANGES -----
    tile_position_ranges = {
        'start_column': (0, level_w - 1),
        'start_row': (0, level_h - 1),
        'goal_column': (0, level_w - 1),
        'goal_row': (0, level_h - 1)
    }

    if config.get('tile_position_ranges') is not None:
        for position, range_str in config['tile_position_ranges'].items():
            if tile_position_ranges.get(position) is None:
                error_exit(
                    "%s tile position does not exist. Position must be one of %s"
                    % (position, str(list(tile_position_ranges.keys()))))
            level_max = level_w if 'column' in position else level_h
            min_index, max_index = eval(range_str)
            min_index, max_index = setup_tile_position_range(
                min_index, max_index, level_max)
            tile_position_ranges[position] = (min_index, max_index)

    # ----- SPECIFY IF START AND/OR GOAL TILE MUST BE ON GROUND -----
    require_start_on_ground = False
    require_goal_on_ground = False

    if config.get('require_start_on_ground') is not None:
        require_start_on_ground = eval(config['require_start_on_ground'])

    if config.get('require_goal_on_ground') is not None:
        require_goal_on_ground = eval(config['require_goal_on_ground'])

    # ----- SPECIFY RANGE NUMBER OF GAPS (PITS) ALLOWED -----
    lo, hi = 0, level_w
    num_gaps_range = (lo, hi)

    if config.get('num_gaps_range') is not None:
        min_gaps, max_gaps = eval(config['num_gaps_range'])
        min_gaps, max_gaps = setup_tile_freq_range('gap', min_gaps, max_gaps,
                                                   lo, hi)
        num_gaps_range = (min_gaps, max_gaps)

    # ----- SPECIFY IF ALL PLATFORM OR BONUS TILES MUST BE REACHABLE -----
    require_all_platforms_reachable = False
    require_all_bonus_tiles_reachable = False
    if config.get('require_all_platforms_reachable') is not None:
        require_all_platforms_reachable = eval(
            config['require_all_platforms_reachable'])
    if config.get('require_all_bonus_tiles_reachable') is not None:
        require_all_bonus_tiles_reachable = eval(
            config['require_all_bonus_tiles_reachable'])

    return {
        'level_w': level_w,  # int
        'level_h': level_h,  # int
        'forced_tiles': forced_tiles,  # {type: list-of-tile-coords}\
        'soft_constraints':
        soft_constraints,  # {constraint_type: constraint_value}
        'num_tile_ranges': num_tile_ranges,  # { type: (min, max) }
        'perc_tile_ranges': perc_tile_ranges,  # { type: (min, max) }
        'perc_level_ranges': perc_level_ranges,  # { level: (min, max) }
        'tile_position_ranges':
        tile_position_ranges,  # { position: (min, max) }
        'require_start_on_ground': require_start_on_ground,  # bool
        'require_goal_on_ground': require_goal_on_ground,  # bool
        'num_gaps_range': num_gaps_range,  # (min, max)
        'require_all_platforms_reachable':
        require_all_platforms_reachable,  # bool
        'require_all_bonus_tiles_reachable':
        require_all_bonus_tiles_reachable  # bool
    }
Example #46
0
def deploy_manager_sources():
    """Deploys all manager sources from a single archive.
    """
    archive_path = ctx.node.properties['manager_resources_package']
    archive_checksum_path = \
        ctx.node.properties['manager_resources_package_checksum_file']
    skip_checksum_validation = ctx.node.properties['skip_checksum_validation']
    if archive_path:
        sources_agents_path = os.path.join(utils.CLOUDIFY_SOURCES_PATH,
                                           'agents')
        agent_archives_path = utils.AGENT_ARCHIVES_PATH
        utils.mkdir(agent_archives_path)
        # this will leave this several hundreds of MBs archive on the
        # manager. should find a way to clean it after all operations
        # were completed and bootstrap succeeded as it is not longer
        # necessary
        utils.mkdir(RESOURCES_DIR)
        res_name = os.path.basename(archive_path)
        destination = os.path.join(RESOURCES_DIR, res_name)
        resources_archive_path = \
            utils.download_cloudify_resource(archive_path,
                                             NODE_NAME,
                                             destination=destination)
        # This would ideally go under utils.download_cloudify_resource but as
        # of now, we'll only be validating the manager resources package.

        if not skip_checksum_validation:
            skip_if_failed = False
            if not archive_checksum_path:
                skip_if_failed = True
                archive_checksum_path = archive_path + '.md5'
            md5_name = os.path.basename(archive_checksum_path)
            destination = os.path.join(RESOURCES_DIR, md5_name)
            resources_archive_md5_path = \
                utils.download_cloudify_resource(archive_checksum_path,
                                                 NODE_NAME,
                                                 destination=destination)
            if not utils.validate_md5_checksum(resources_archive_path,
                                               resources_archive_md5_path):
                if skip_if_failed:
                    ctx.logger.warn('Checksum validation failed. '
                                    'Continuing as no checksum file was '
                                    'explicitly provided.')
                else:
                    utils.error_exit(
                        'Failed to validate checksum for {0}'.format(
                            resources_archive_path))
            else:
                ctx.logger.info('Resources Package downloaded successfully...')
        else:
            ctx.logger.info(
                'Skipping resources package checksum validation...')

        utils.untar(resources_archive_path,
                    utils.CLOUDIFY_SOURCES_PATH,
                    skip_old_files=True)

        def splitext(filename):
            # not using os.path.splitext as it would return .gz instead of
            # .tar.gz
            if filename.endswith('.tar.gz'):
                return '.tar.gz'
            elif filename.endswith('.exe'):
                return '.exe'
            else:
                utils.exit_error(
                    'Unknown agent format for {0}. '
                    'Must be either tar.gz or exe'.format(filename))

        def normalize_agent_name(filename):
            # this returns the normalized name of an agent upon which our agent
            # installer retrieves agent packages for installation.
            # e.g. Ubuntu-trusty-agent_3.4.0-m3-b392.tar.gz returns
            # ubuntu-trusty-agent
            return filename.split('_', 1)[0].lower()

        for agent_file in os.listdir(sources_agents_path):

            agent_id = normalize_agent_name(agent_file)
            agent_extension = splitext(agent_file)
            utils.move(
                os.path.join(sources_agents_path, agent_file),
                os.path.join(agent_archives_path, agent_id + agent_extension))
def get_trueFIs(exp_res_filename,
                eval_res_filename,
                min_freq,
                delta,
                pvalue_mode,
                first_epsilon=1.0):
    """ Compute the True Frequent Itemsets using the 'holdout-VC' method with
    the binomial test

    TODO Add more details."""

    stats = dict()

    with open(exp_res_filename) as FILE:
        size_line = FILE.readline()
        try:
            size_str = size_line.split("(")[1].split(")")[0]
        except IndexError:
            utils.error_exit(
                "Cannot compute size of the explore dataset: '{}' is not in the recognized format\n"
                .format(size_line))
        try:
            stats['exp_size'] = int(size_str)
        except ValueError:
            utils.error_exit(
                "Cannot compute size of the explore dataset: {} is not a number\n"
                .format(size_str))

    with open(eval_res_filename) as FILE:
        size_line = FILE.readline()
        try:
            size_str = size_line.split("(")[1].split(")")[0]
        except IndexError:
            utils.error_exit(
                "Cannot compute size of the eval dataset: '{}' is not in the recognized format\n"
                .format(size_line))
        try:
            stats['eval_size'] = int(size_str)
        except ValueError:
            utils.error_exit(
                "Cannot compute size of the eval dataset: '{}' is not a number\n"
                .format(size_str))

    stats['orig_size'] = stats['exp_size'] + stats['eval_size']

    exp_res = utils.create_results(exp_res_filename, min_freq)
    stats['exp_res'] = len(exp_res)
    exp_res_set = set(exp_res.keys())
    eval_res = utils.create_results(eval_res_filename, min_freq)
    stats['eval_res'] = len(eval_res)
    eval_res_set = set(eval_res.keys())
    intersection = exp_res_set & eval_res_set
    stats['holdout_intersection'] = len(intersection)
    stats['holdout_false_negatives'] = len(exp_res_set - eval_res_set)
    stats['holdout_false_positives'] = len(eval_res_set - exp_res_set)
    stats['holdout_jaccard'] = len(intersection) / len(exp_res_set
                                                       | eval_res_set)

    # One may want to play with giving different values for the different error
    # probabilities, but there isn't really much point in it.
    stats['lowered_delta'] = 1.0 - math.sqrt(1 - delta)

    stats['filter_epsilon'] = first_epsilon

    sys.stderr.write("Computing candidates...")
    sys.stderr.flush()
    freq_bound = min_freq + stats['filter_epsilon']
    exp_res_filtered = set()
    exp_res_filtered_items = set()
    trueFIs = dict()
    for itemset in exp_res:
        if exp_res[itemset] < freq_bound:
            exp_res_filtered.add(itemset)
            exp_res_filtered_items |= itemset
        else:
            # Add itemsets with frequency at last freq_bound to the TFIs
            trueFIs[itemset] = exp_res[itemset]
    sys.stderr.write("done: {} exp_res_filtered ({} items)\n".format(
        len(exp_res_filtered), len(exp_res_filtered_items)))
    sys.stderr.flush()
    stats['tfis_from_exp'] = len(trueFIs)
    stats['exp_res_filtered'] = len(exp_res_filtered)

    supposed_freq = (math.ceil(stats['orig_size'] * min_freq) -
                     1) / stats['orig_size']
    if stats['exp_res_filtered'] > 0:
        eval_res = utils.create_results(eval_res_filename, min_freq)
        eval_res_set = set(eval_res.keys())
        stats['eval_res'] = len(eval_res)

        intersection = exp_res_filtered & eval_res_set
        stats['holdout_intersection'] = len(intersection)
        stats['holdout_false_negatives'] = len(exp_res_filtered - eval_res_set)

        # Bonferroni correction (Union bound). We work in the log space.
        stats['critical_value'] = math.log(stats['lowered_delta']) - math.log(
            stats['exp_res_filtered'])

        # Add TFIs from eval
        last_accepted_freq = 1.0
        last_non_accepted_freq = min_freq
        for itemset in sorted(intersection,
                              key=lambda x: eval_res[x],
                              reverse=True):
            p_value = utils.pvalue(pvalue_mode, eval_res[itemset],
                                   stats['eval_size'], supposed_freq)
            if p_value <= stats['critical_value']:
                trueFIs[itemset] = eval_res[itemset]
                last_accepted_freq = eval_res[itemset]
            else:
                last_non_accepted_freq = eval_res[itemset]
                break

        # Compute epsilon for the binomial
        min_diff = 5e-6  # controls when to stop the binary search
        while last_accepted_freq - last_non_accepted_freq > min_diff:
            mid_point = (last_accepted_freq - last_non_accepted_freq) / 2
            test_freq = last_non_accepted_freq + mid_point
            p_value = utils.pvalue(pvalue_mode, test_freq, stats['eval_size'],
                                   supposed_freq)
            if p_value <= stats['critical_value']:
                last_accepted_freq = test_freq
            else:
                last_non_accepted_freq = test_freq

        stats['epsilon'] = last_non_accepted_freq + (
            (last_accepted_freq - last_non_accepted_freq) / 2) - min_freq
        stats['removed'] = len(intersection) - len(trueFIs)
    else:  # stats['exp_res_filtered'] == 0
        stats['eval_res'] = 0
        stats['holdout_false_negatives'] = 0
        stats['holdout_intersection'] = 0
        stats['critical_value'] = 0
        stats['epsilon'] = 0
        stats['removed'] = 0

    return (trueFIs, stats)
Example #48
0
def main():
    if len(sys.argv) != 7:
        utils.error_exit(
            " ".join(
                ("USAGE: {}".format(os.path.basename(sys.argv[0])),
                 "use_additional_knowledge={{0|1}} delta min_freq gap dataset",
                 "results_filename\n")))
    dataset = sys.argv[5]
    res_filename = os.path.expanduser(sys.argv[6])
    if not os.path.isfile(res_filename):
        utils.error_exit(
            "{} does not exist, or is not a file\n".format(res_filename))
    try:
        use_additional_knowledge = int(sys.argv[1])
    except ValueError:
        utils.error_exit("{} is not a number\n".format(sys.argv[1]))
    try:
        delta = float(sys.argv[2])
    except ValueError:
        utils.error_exit("{} is not a number\n".format(sys.argv[2]))
    try:
        min_freq = float(sys.argv[3])
    except ValueError:
        utils.error_exit("{} is not a number\n".format(sys.argv[3]))
    try:
        gap = float(sys.argv[4])
    except ValueError:
        utils.error_exit("{} is not a number\n".format(sys.argv[4]))

    ds_stats = getDatasetInfo.get_ds_stats(dataset)

    (trueFIs, stats) = get_trueFIs(ds_stats, res_filename, min_freq,
                                   delta, gap, use_additional_knowledge)

    utils.print_itemsets(trueFIs, ds_stats['size'])

    sys.stderr.write(
        ",".join(
            ("res_file={}".format(os.path.basename(res_filename)),
             "use_add_knowl={}".format(use_additional_knowledge),
             "e1={},e2={}".format(stats['epsilon_1'], stats['epsilon_2']),
             "d={}".format(delta),
             "min_freq={},trueFIs={}\n".format(min_freq, len(trueFIs)))))
    sys.stderr.write(
        ",".join(
            ("base_set={}".format(stats['base_set']),
             "maximal_itemsets={}".format(stats['maximal_itemsets']),
             "negbor={}".format(stats['negative_border']),
             "emp_vc_dim={}".format(stats['emp_vc_dim']),
             "not_emp_vc_dim={}\n".format(stats['not_emp_vc_dim']))))
    sys.stderr.write(
        ",".join(
            ("res_file,add_knowl,e1,e2,delta,min_freq,trueFIs",
             "base_set,maximal_itemsets,negative_border,emp_vc_dim",
             "not_emp_vc_dim\n")))
    sys.stderr.write("{}\n".format(
        ",".join((str(i) for i in (
            os.path.basename(res_filename), use_additional_knowledge,
            stats['epsilon_1'], stats['epsilon_2'], delta,
            min_freq, len(trueFIs), stats['base_set'],
            stats['maximal_itemsets'], stats['negative_border'],
            stats['emp_vc_dim'], stats['not_emp_vc_dim'])))))
def get_trueFIs(exp_res_filename, eval_res_filename, min_freq, delta, pvalue_mode, first_epsilon=1.0):
    """ Compute the True Frequent Itemsets using the 'holdout-VC' method with
    the binomial test

    TODO Add more details."""

    stats = dict()

    with open(exp_res_filename) as FILE:
        size_line = FILE.readline()
        try:
            size_str = size_line.split("(")[1].split(")")[0]
        except IndexError:
            utils.error_exit("Cannot compute size of the explore dataset: '{}' is not in the recognized format\n".format(size_line))
        try:
            stats['exp_size'] = int(size_str)
        except ValueError:
            utils.error_exit("Cannot compute size of the explore dataset: {} is not a number\n".format(size_str))

    with open(eval_res_filename) as FILE:
        size_line = FILE.readline()
        try:
            size_str = size_line.split("(")[1].split(")")[0]
        except IndexError:
            utils.error_exit("Cannot compute size of the eval dataset: '{}' is not in the recognized format\n".format(size_line))
        try:
            stats['eval_size'] = int(size_str)
        except ValueError:
            utils.error_exit("Cannot compute size of the eval dataset: '{}' is not a number\n".format(size_str))

    stats['orig_size'] = stats['exp_size'] + stats['eval_size']

    exp_res = utils.create_results(exp_res_filename, min_freq)
    stats['exp_res'] = len(exp_res)
    exp_res_set = set(exp_res.keys())
    eval_res = utils.create_results(eval_res_filename, min_freq)
    stats['eval_res'] = len(eval_res)
    eval_res_set = set(eval_res.keys())
    intersection = exp_res_set & eval_res_set
    stats['holdout_intersection'] = len(intersection)
    stats['holdout_false_negatives'] = len(exp_res_set - eval_res_set)
    stats['holdout_false_positives'] = len(eval_res_set - exp_res_set)
    stats['holdout_jaccard'] = len(intersection) / len(exp_res_set | eval_res_set) 

    # One may want to play with giving different values for the different error
    # probabilities, but there isn't really much point in it.
    stats['lowered_delta'] = 1.0 - math.sqrt(1 - delta)

    stats['filter_epsilon'] = first_epsilon

    sys.stderr.write("Computing candidates...")
    sys.stderr.flush()
    freq_bound = min_freq + stats['filter_epsilon']
    exp_res_filtered = set()
    exp_res_filtered_items = set()
    trueFIs = dict()
    for itemset in exp_res:
        if exp_res[itemset] < freq_bound:
            exp_res_filtered.add(itemset)
            exp_res_filtered_items |= itemset
        else:
            # Add itemsets with frequency at last freq_bound to the TFIs
            trueFIs[itemset] = exp_res[itemset]
    sys.stderr.write("done: {} exp_res_filtered ({} items)\n".format(len(exp_res_filtered),
        len(exp_res_filtered_items)))
    sys.stderr.flush()
    stats['tfis_from_exp'] = len(trueFIs)
    stats['exp_res_filtered'] = len(exp_res_filtered)

    supposed_freq = (math.ceil( stats['orig_size'] * min_freq) - 1) / stats['orig_size']
    if stats['exp_res_filtered'] > 0:
        eval_res = utils.create_results(eval_res_filename, min_freq)
        eval_res_set = set(eval_res.keys())
        stats['eval_res'] = len(eval_res)

        intersection = exp_res_filtered & eval_res_set
        stats['holdout_intersection'] = len(intersection)
        stats['holdout_false_negatives'] = len(exp_res_filtered - eval_res_set)

        # Bonferroni correction (Union bound). We work in the log space.
        stats['critical_value'] = math.log(stats['lowered_delta']) - math.log(stats['exp_res_filtered'])

        # Add TFIs from eval
        last_accepted_freq = 1.0
        last_non_accepted_freq = min_freq
        for itemset in sorted(intersection, key=lambda x : eval_res[x], reverse=True):
            p_value = utils.pvalue(pvalue_mode, eval_res[itemset],
                    stats['eval_size'], supposed_freq)
            if p_value <= stats['critical_value']:
                trueFIs[itemset] = eval_res[itemset]
                last_accepted_freq = eval_res[itemset]
            else:
                last_non_accepted_freq = eval_res[itemset]
                break

        # Compute epsilon for the binomial
        min_diff = 5e-6 # controls when to stop the binary search
        while last_accepted_freq - last_non_accepted_freq > min_diff:
            mid_point = (last_accepted_freq - last_non_accepted_freq) / 2
            test_freq = last_non_accepted_freq + mid_point
            p_value = utils.pvalue(pvalue_mode, test_freq,
                    stats['eval_size'], supposed_freq)
            if p_value <= stats['critical_value']:
                last_accepted_freq = test_freq
            else:
                last_non_accepted_freq = test_freq

        stats['epsilon'] = last_non_accepted_freq + ((last_accepted_freq -
            last_non_accepted_freq) / 2) - min_freq
        stats['removed'] = len(intersection) - len(trueFIs)
    else: # stats['exp_res_filtered'] == 0
        stats['eval_res'] = 0
        stats['holdout_false_negatives'] = 0
        stats['holdout_intersection'] = 0
        stats['critical_value'] = 0
        stats['epsilon'] = 0
        stats['removed'] = 0

    return (trueFIs, stats)
Example #50
0
def main(game, levels, process, solve, trial, max_sol, threads):

    if process:
        print("----- PROCESSING -----")
        process_dir = utils.get_directory("process_console_output")

        for level in levels:
            process_file = utils.get_filepath(process_dir, "%s.txt" % level)
            os.system(
                "(time pypy3 main.py platformer %s %s --process) > %s 2>&1" %
                (game, level, process_file))
            os.system(
                "(time python main.py platformer %s %s --gen_prolog) >> %s 2>&1"
                % (game, level, process_file))
            print("Saved to: %s" % process_file)

    if solve:
        print("----- SOLVING -----")
        config_formats = TRIAL_CONFIG_FORMATS.get(trial)
        if config_formats is None:
            utils.error_exit("--trial must be one of %s" %
                             str(list(TRIAL_CONFIG_FORMATS.keys())))

        prolog_file_format = "level_saved_files_block/prolog_files/%s.pl"
        level_structural_txt_file_format = "level_structural_layers/generated/%s.txt"
        level_model_str_file_format = "level_saved_files_block/generated_level_model_strs/%s.txt"
        level_assignments_dict_file_format = "level_saved_files_block/generated_level_assignments_dicts/%s.pickle"
        level_valid_path_file_format = "level_saved_files_block/generated_level_paths/%s.pickle"
        level_state_graph_file_format = "level_saved_files_block/enumerated_state_graphs/generated/%s.gpickle"

        solve_dir = utils.get_directory("solver_console_output")
        sol_order = list(range(max_sol))
        sol_order.reverse()

        for sol in sol_order:
            for config_file_format in config_formats:
                for level in levels:
                    prolog_file = prolog_file_format % level
                    prolog_filename = utils.get_basepath_filename(
                        prolog_file, 'pl')
                    config_file = config_file_format % (game, level)
                    config_filename = utils.get_basepath_filename(
                        config_file, 'json')

                    answer_set_filename_format = '_'.join(
                        [prolog_filename, config_filename, 'a%d'])
                    cur_answer_set_filename = answer_set_filename_format % sol
                    default_answer_set_filename = answer_set_filename_format % 0

                    solve_file = utils.get_filepath(
                        "%s/%s/" % (solve_dir, level),
                        "%s.txt" % cur_answer_set_filename)

                    os.system(
                        "(time python run_solver.py %s %s --max_sol 1 --threads %d --save --validate) > %s 2>&1"
                        % (prolog_file, config_file, threads, solve_file))
                    print("Saved to: %s" % solve_file)

                    if sol != 0 and os.path.exists(
                            level_structural_txt_file_format %
                            default_answer_set_filename):
                        os.system("mv %s %s" %
                                  (level_structural_txt_file_format %
                                   default_answer_set_filename,
                                   level_structural_txt_file_format %
                                   cur_answer_set_filename))

                    if sol != 0 and os.path.exists(
                            level_assignments_dict_file_format %
                            default_answer_set_filename):
                        os.system("mv %s %s" %
                                  (level_assignments_dict_file_format %
                                   default_answer_set_filename,
                                   level_assignments_dict_file_format %
                                   cur_answer_set_filename))

                    if sol != 0 and os.path.exists(
                            level_model_str_file_format %
                            default_answer_set_filename):
                        os.system("mv %s %s" % (level_model_str_file_format %
                                                default_answer_set_filename,
                                                level_model_str_file_format %
                                                cur_answer_set_filename))

                    if sol != 0 and os.path.exists(
                            level_valid_path_file_format %
                            default_answer_set_filename):
                        os.system("mv %s %s" % (level_valid_path_file_format %
                                                default_answer_set_filename,
                                                level_valid_path_file_format %
                                                cur_answer_set_filename))

                    if sol != 0 and os.path.exists(
                            level_state_graph_file_format %
                            default_answer_set_filename):
                        os.system("mv %s %s" % (level_state_graph_file_format %
                                                default_answer_set_filename,
                                                level_state_graph_file_format %
                                                cur_answer_set_filename))

                    if os.path.exists(level_structural_txt_file_format %
                                      cur_answer_set_filename):
                        print("Level txt path: %s" %
                              level_structural_txt_file_format %
                              cur_answer_set_filename)