Exemple #1
0
def add_position_to_node():
    nodes = common.read_json(OUTPUT_DIR, NODE_FILE_NAME)
    positions = common.read_json(OUTPUT_DIR, POSITION_FILE_NAME)
    for node in nodes:
        position = positions[str(node['id'])]
        node['x'] = position['x']
        node['y'] = position['y']
    common.write_json(nodes, OUTPUT_DIR, NODE_FILE_NAME)
Exemple #2
0
def add_value_to_node():
    nodes = common.read_json(OUTPUT_DIR, NODE_FILE_NAME)
    edges = common.read_json(OUTPUT_DIR, EDGE_FILE_NAME)
    for node in nodes:
        count = len(list(filter(lambda x: x['from'] == node['id'], edges)))
        count += len(list(filter(lambda x: x['to'] == node['id'], edges)))
        node['value'] = count
    common.write_json(nodes, OUTPUT_DIR, NODE_FILE_NAME)
Exemple #3
0
def convert_to_rdf():
    """
    Converts the read data to triples
    """

    print ""
    print "Convert to RDF..."

    movies = common.read_json(JSON_OUT_FILE)

    g = Graph()
    g.bind("", NS_OMDB)
    g.bind("dbpedia-owl", NS_DBPEDIA_OWL)
    g.bind("dbpprop", NS_DBPPROP)

    for m in movies:
        movie = URIRef(BASE_URI % common.encodeString(m["title"]))
        g.add((movie, RDF.type, NS_DBPEDIA_OWL.Film))
        g.add((movie, RDFS.label, Literal(m["title"])))
        g.add((movie, NS_DBPPROP.title, Literal(m["title"])))

        if "imdbID" in m:
            g.add((movie, NS_DBPEDIA_OWL.imdbId, Literal(m["imdbID"])))

    common.write_rdf(RDF_OUT_FILE, g)
Exemple #4
0
def analyze_experiment(info, experiments_dir, tmp_data_dir,
                       date_str, exp_name):
    exp_dir = os.path.join(experiments_dir, exp_name)

    exp_data_dir = os.path.join(tmp_data_dir, exp_name)
    tmp_analysis_dir = os.path.join(exp_data_dir, 'analysis')
    idemp_mkdir(tmp_analysis_dir)

    analyzed_data_dir = info.exp_data_dir(exp_name)
    if not os.path.exists(analyzed_data_dir):
        idemp_mkdir(analyzed_data_dir)

    subprocess.call([os.path.join(exp_dir, 'analyze.sh'),
                     info.exp_config_dir(exp_name), exp_data_dir, tmp_analysis_dir],
                    cwd=exp_dir)

    status = validate_status(tmp_analysis_dir)

    # read the analyzed data, append a timestamp field, and copy over to the permanent data dir
    if status['success']:
        data_exists = check_file_exists(tmp_analysis_dir, 'data.json')
        if not data_exists:
            status = {'success': False, 'message': 'No data.json file produced by {}'.format(exp_name)}
        else:
            # collect data to dump to data_*.json
            dump_data = {
                'timestamp'  : date_str,
            }
            dump_data.update(read_json(tmp_analysis_dir, 'data.json'))
            # fetch time spent on the experiment
            dump_data.update(get_timing_info(info, exp_name))
            write_json(analyzed_data_dir, 'data_{}.json'.format(date_str), dump_data)
    
    info.report_exp_status(exp_name, 'analysis', status)
    return status['success']
def convert_to_rdf():
    """
    Converts the read data to triples
    """

    print ""
    print "Convert to RDF..."

    songs = common.read_json(JSON_OUT_FILE)

    g = Graph()
    g.bind("", NS_LASTFM)
    g.bind("dbpedia-owl", NS_DBPEDIA_OWL)
    g.bind("dbpprop", NS_DBPPROP)

    for s in songs:
        if "tags" not in s or len(s["tags"]) < 1:
            continue

        artist = URIRef(BASE_URI % common.encodeString(s["artist"]))
        g.add((artist, RDF.type, NS_DBPEDIA_OWL.MusicalArtist))
        g.add((artist, RDFS.label, Literal(s["artist"])))
        g.add((artist, NS_DBPPROP.name, Literal(s["artist"])))

        song = URIRef(BASE_URI % common.encodeString(u"{0:s} - {1:s}".format(s['artist'],  s["title"])))
        g.add((song, RDF.type, NS_DBPEDIA_OWL.Song))
        g.add((song, RDFS.label, Literal(u"{0:s} - {1:s}".format(s['artist'], s["title"]))))
        g.add((song, NS_DBPPROP.title, Literal(s["title"])))
        g.add((song, NS_DBPEDIA_OWL.artist, artist))

        for t in s["tags"]:
            g.add((song, NS_LASTFM.tagged, Literal(t)))

    common.write_rdf(RDF_OUT_FILE, g)
def check_config():
    try:
        config = common.read_json("doc/config.json")
        config_error = 0
    except:
        config_error = 1
    return config_error
def run_baseline(model, exp_config, config, config_dir, output_dir):
    '''
        Run a baseline triral and obtain memory usage.
        This is used for getting a reference memory usage for
        DTR `ratio` commands
    '''
    baseline_config = {'batch_size': exp_config['batch_size']}
    if 'extra_params' in exp_config:
        baseline_config['extra_params'] = exp_config['extra_params']
    filename = str(time.time()) + '.json'
    temp_file = prepare_out_file(os.getcwd(), filename)
    success, msg = run_trials(config_dir,
                              python_command('baseline', config),
                              'baseline',
                              model,
                              baseline_config,
                              config['n_inputs'],
                              config['n_reps'],
                              output_dir,
                              report_errors=config['report_errors'],
                              append_to_csv=False,
                              trial_run=True,
                              trial_run_outfile=temp_file)
    if not success:
        return False, 'Error while running baseline trial: \n{}'.format(msg)

    mem_usage = read_json(output_dir, temp_file)
    os.remove(temp_file)
    if 'mem' not in mem_usage:
        return False, 'failed to get baseline memory usage'
    return True, mem_usage['mem']
Exemple #8
0
def main(argv):
    args = parse_arguments(argv)

    if args.save_as is None:
        out_dir = os.path.join(args.out_dir, base64_encode(args.keyword))
    else:
        out_dir = os.path.join(args.out_dir, base64_encode(args.save_as))
    downloader = NiconicoDownloader(out_dir)

    auth = read_json(args.auth_json)
    auth = dict() if auth is None else auth
    while True:
        if 'niconico' not in auth:
            auth['niconico'] = {
                'username': input('Username >> '),
                'password': getpass('Password >> '),
            }

        try:
            downloader.authenticate(**auth['niconico'])
            write_json(args.auth_json, auth)
            break
        except NoSuchElementException:
            logger.error('Failed to login.')
            del auth['niconico']

    downloader(args.keyword)
Exemple #9
0
def init_vta_env(target):
    """Read the VTA config and set the target to `target`."""
    config_dir = os.path.join(os.environ['TVM_HOME'], 'vta', 'config')
    config_filename = 'vta_config.json'
    vta_config = read_json(config_dir, config_filename)
    vta_config["TARGET"] = target
    return vta.Environment(vta_config)
def run_baseline(model, exp_config, config, config_dir, output_dir):
    '''
        Run a baseline triral and obtain memory usage.
        This is used for getting a reference memory usage for
        DTR `ratio` commands
    '''
    baseline_config = { 'batch_size' : exp_config['batch_size'],
                        'timeout': exp_config.get('timeout', 60),
                        # only doing a minimal number of runs because we are only getting the memory usage,
                        # which should be identical between runs
                        'n_reps': 10,
                        'extra_params': exp_config.get('extra_params', {})
    }
    if 'input_params' in exp_config:
        baseline_config['input_params'] = exp_config['input_params']
    filename = str(time.time()) + '.json'
    temp_file = prepare_out_file(os.getcwd(), filename)
    success, msg = run_trials(config_dir,
                              python_command('baseline', config),
                              'baseline', model, baseline_config,
                              exp_config.get('n_inputs', config['n_inputs']),
                              output_dir,
                              report_errors=config['report_errors'],
                              append_to_csv=False,
                              trial_run=True,
                              trial_run_outfile=temp_file,
                              sync_gpu=config['sync_gpu'])
    if not success:
        return False, 'Error while running baseline trial: \n{}'.format(msg)

    mem_usage = read_json(output_dir, temp_file)
    os.remove(temp_file)
    if 'mem' not in mem_usage:
        return False, 'failed to get baseline memory usage'
    return True, mem_usage['mem']
def check_confidentials():
    try:
        confidentials = common.read_json("confidentials.json")
        confidentials_error = 0
    except:
        confidentials_error = 1
    return confidentials_error
Exemple #12
0
def read_first_existing(*files):
	for f in files:
		if os.path.exists(f):
			try:
				return c.read_json(f)
			except ValueError:
				pass
	raise IOError("Cannot find any of " + files)
Exemple #13
0
def read_first_existing(*files):
    for f in files:
        if os.path.exists(f):
            try:
                return c.read_json(f)
            except ValueError:
                pass
    raise IOError("Cannot find any of " + files)
Exemple #14
0
def main(home_dir, experiments_dir, subsystem_dir, telemetry_script_dir):
    """
    Home directory: Where config info for experiments, etc., is
    Experiments directory: Where experiment implementations are
    Both should be given as absolute directories
    """
    time_str = get_timestamp()

    if not check_file_exists(home_dir, 'config.json'):
        print('Dashboard config (config.json) is missing in {}'.format(home_dir))
        return 1
    dash_config = read_json(home_dir, 'config.json')

    # must expand all tildes in the config to avoid future errors
    for path_field in ['tmp_data_dir', 'setup_dir', 'backup_dir']:
        dash_config[path_field] = os.path.expanduser(dash_config[path_field])

    tmp_data_dir = os.path.join(dash_config['tmp_data_dir'], 'benchmarks_' + time_str)
    data_archive = os.path.join(dash_config['tmp_data_dir'], 'benchmarks_' + time_str + '_data.tar.gz')
    setup_dir = dash_config['setup_dir']
    backup_archive = os.path.join(dash_config['backup_dir'], 'dashboard_' + time_str + '.tar.gz')
    idemp_mkdir(tmp_data_dir)
    idemp_mkdir(os.path.dirname(backup_archive))
    idemp_mkdir(setup_dir)

    info = DashboardInfo(home_dir)

    # make a backup of the previous dashboard files if they exist
    if os.path.exists(home_dir):
        subprocess.call(['tar', '-zcf', backup_archive, home_dir])

    # directories whose contents should not change between runs of the dashboard
    persistent_dirs = {info.exp_data,
                       info.exp_configs,
                       info.subsys_configs,
                       info.subsys_output}
    all_dashboard_dirs = info.all_experiment_dirs() + info.all_subsystem_dirs()

    # instantiate necessary dashboard dirs and clean any that should be empty
    for dashboard_dir in all_dashboard_dirs:
        if dashboard_dir not in persistent_dirs:
            subprocess.call(['rm', '-rf', dashboard_dir])
        idemp_mkdir(dashboard_dir)

    randomize_exps = True
    if 'randomize' in dash_config:
        randomize_exps = dash_config['randomize']

    telemetry_rate = dash_config.get('telemetry_rate', 15)
    run_cpu_telemetry = dash_config.get('run_cpu_telemetry', False)
    run_gpu_telemetry = dash_config.get('run_gpu_telemetry', False)
    run_all_experiments(info, experiments_dir, setup_dir,
                        tmp_data_dir, data_archive,
                        time_str, telemetry_script_dir, run_cpu_telemetry=run_cpu_telemetry, run_gpu_telemetry=run_gpu_telemetry,
                        telemetry_interval=telemetry_rate, randomize=randomize_exps)

    run_all_subsystems(info, subsystem_dir, time_str)
Exemple #15
0
    def _eval(self, gt_fname, rec_fname):
        gt_playlists = read_json(gt_fname)
        gt_dict = {g["id"]: g for g in gt_playlists}
        rec_playlists = read_json(rec_fname)

        gt_ids = set([g["id"] for g in gt_playlists])
        rec_ids = set([r["id"] for r in rec_playlists])

        if gt_ids != rec_ids:
            raise Exception("결과의 플레이리스트 수가 올바르지 않습니다.")

        rec_song_counts = [len(p["songs"]) for p in rec_playlists]
        rec_tag_counts = [len(p["tags"]) for p in rec_playlists]

        if set(rec_song_counts) != set([100]):
            raise Exception("추천 곡 결과의 개수가 맞지 않습니다.")

        if set(rec_tag_counts) != set([10]):
            raise Exception("추천 태그 결과의 개수가 맞지 않습니다.")

        rec_unique_song_counts = [len(set(p["songs"])) for p in rec_playlists]
        rec_unique_tag_counts = [len(set(p["tags"])) for p in rec_playlists]

        if set(rec_unique_song_counts) != set([100]):
            raise Exception("한 플레이리스트에 중복된 곡 추천은 허용되지 않습니다.")

        if set(rec_unique_tag_counts) != set([10]):
            raise Exception("한 플레이리스트에 중복된 태그 추천은 허용되지 않습니다.")

        music_ndcg = 0.0
        tag_ndcg = 0.0

        for rec in rec_playlists:
            gt = gt_dict[rec["id"]]
            music_ndcg += self._ndcg(gt["songs"], rec["songs"][:100])
            tag_ndcg += self._ndcg(gt["tags"], rec["tags"][:10])

        music_ndcg = music_ndcg / len(rec_playlists)
        tag_ndcg = tag_ndcg / len(rec_playlists)
        score = music_ndcg * 0.85 + tag_ndcg * 0.15

        return music_ndcg, tag_ndcg, score
Exemple #16
0
def summary_valid(exp_summary_dir):
    """
    Checks that the experiment summary directory contains a summary.json
    file and that the summary.json file contains the required fields, title
    and value.
    """
    exists = check_file_exists(exp_summary_dir, 'summary.json')
    if not exists:
        return False
    summary = read_json(exp_summary_dir, 'summary.json')
    return 'title' in summary and 'value' in summary
Exemple #17
0
async def message(session: CommandSession):
    user_id = session.event['user_id']

    if user_id in config.SUPERUSERS:
        info = common.read_json('animal_crossing/data/pid.json', False)
        if info is not False:
            fo = open('animal_crossing/data/pid.json', "w")
            info['rebot'] = True
            fo.write(json.dumps(info))
            fo.flush()
            fo.close()
Exemple #18
0
def main(data_dir, config_dir, output_dir):
    config, msg = validate(config_dir)
    if config is None:
        write_status(output_dir, False, msg)
        return 1

    # No further analysis is required beyond the raw stats reported by the VTA
    # simulator, so we just propagate the data to the next stage of the
    # pipeline.
    data = read_json(data_dir, 'data.json')
    write_json(output_dir, 'data.json', data)
    write_status(output_dir, True, 'success')
Exemple #19
0
def make_topology():
    entries = common.read_json(OUTPUT_DIR, ENTRIES_FILE_NAME)

    nodes = []
    edges = []

    for entry in entries:
        nodes.append(make_node(entry))
        edges += make_edges(entry, entries)

    common.write_json(nodes, OUTPUT_DIR, NODE_FILE_NAME)
    common.write_json(edges, OUTPUT_DIR, EDGE_FILE_NAME)
Exemple #20
0
def run(args):
    domain = args.domain
    if not domain:
        print('usage: wydomain.py -d aliyun.com')
        sys.exit(1)

    outfile = '{0}.log'.format(domain)

    script_path = os.path.dirname(os.path.abspath(__file__))
    _cache_path = os.path.join(script_path, 'result/{0}'.format(domain))
    if not os.path.exists(_cache_path):
        os.makedirs(_cache_path, 0777)

    # start crt
    print '[*]Starting Crt fetch ...'
    result = Crt(domain=domain).run()
    _cache_file = os.path.join(_cache_path, 'crt.json')
    save_result(_cache_file, result)
    print '\t[-]Fetch complete | Found {}'.format(len(result))

    # start ilink
    print '[*]Starting iLink fetch ...'
    result = Ilink(domain=domain).run()
    _cache_file = os.path.join(_cache_path, 'ilink.json')
    save_result(_cache_file, result)
    print '\t[-]Fetch complete | Found {}'.format(len(result))

    # new start brute
    print '[*]Starting Brute sub ...'
    result = BruteDns(domain=domain).run()
    _cache_file = os.path.join(_cache_path, 'brute.json')
    save_result(_cache_file, result)
    print '\n\t[-]Bruteforce complete | Found {}'.format(len(result))
    #

    _cache_files = ['crt.json', 'ilink.json', 'brute.json']

    subdomains = []

    for file in _cache_files:
        _cache_file = os.path.join(_cache_path, file)
        json_data = read_json(_cache_file)
        if json_data:
            subdomains.extend(json_data)

    subdomains = list(set(subdomains))

    _result_file = os.path.join(script_path, outfile)
    save_result(_result_file, subdomains)

    print '[*]{0} {1} subdomains save to {2}'.format(domain, len(subdomains),
                                                     _result_file)
def main(argv):
    args = parse_arguments(argv)

    dirname = os.path.basename(args.input_dir)
    valid_dir = os.path.join(args.output_dir, dirname, 'valid')
    invalid_dir = os.path.join(args.output_dir, dirname, 'invalid')
    os.makedirs(valid_dir, exist_ok=True)
    os.makedirs(invalid_dir, exist_ok=True)
    removed_json = os.path.join(args.output_dir, dirname, '.cache.json')

    names = get_filenames(args.input_dir)
    valid_names = get_filenames(valid_dir)
    invalid_names = get_filenames(invalid_dir)
    removed_names = read_json(removed_json)
    removed_names = [] if removed_names is None else removed_names
    names = sorted(
        set(names) - set(valid_names) - set(invalid_names) -
        set(removed_names))

    # Instruction
    sys.stdout.write('Key input instructions:\n'
                     'j: Accept current image\n'
                     'k: Reject current image\n'
                     'u: Undo recent validation\n'
                     'd: Exclude image \n'
                     'q: Quit validation\n')

    i = 0
    while i < len(names):
        path = os.path.join(args.input_dir, names[i])
        key = show_image(path, args.size)

        if key == KeyStatus.UNDO and i > 1:
            i -= 1
            if os.path.exists(os.path.join(valid_dir, names[i])):
                os.remove(os.path.join(valid_dir, names[i]))
            elif os.path.exists(os.path.join(invalid_dir, names[i])):
                os.remove(os.path.join(invalid_dir, names[i]))
            else:
                removed_names.pop()
        elif key == KeyStatus.OK:
            shutil.copyfile(path, os.path.join(valid_dir, names[i]))
            i += 1
        elif key == KeyStatus.FAIL:
            shutil.copyfile(path, os.path.join(invalid_dir, names[i]))
            i += 1
        elif key == KeyStatus.REMOVE:
            removed_names.append(names[i])
            write_json(removed_json, removed_names)
            i += 1
        else:
            exit()
Exemple #22
0
def main(config_dir,
         experiment_mode,
         model_name,
         input_idx,
         params_file,
         out_file,
         trial_run=False,
         trial_run_outfile=None):
    if 'DTR_MODEL_NAME' in os.environ:
        model_name = os.environ['DTR_MODEL_NAME']
    config, msg = validate_trials_config(config_dir)
    if config is None:
        print(msg)
        return 1

    use_dtr = (experiment_mode == 'dtr')

    i = int(input_idx)
    is_trial = trial_run == 'True'

    if config['set_seed']:
        torch.manual_seed(config['seed'] + i)
        random.seed(config['seed'] + i)

    cwd = os.getcwd()

    # handle specific params, esp. for DTR
    specific_params = read_json(cwd, params_file)
    if 'DTR_MEMORY_BUDGET' in os.environ:
        specific_params['memory_budget'] = float(
            os.environ['DTR_MEMORY_BUDGET'])

    assert 'batch_size' in specific_params
    if use_dtr:
        assert 'memory_budget' in specific_params
        if specific_params['memory_budget'] > 0:
            print(f'Setting budget to {int(specific_params["memory_budget"])}')
            torch.set_memory_budget(int(specific_params['memory_budget']))
    if is_trial:
        timing_loop(model_name, i, config, use_dtr, specific_params, None,
                    True, trial_run_outfile)
        return

    with open(out_file, 'a', newline='') as csvfile:
        writer = create_csv_writer(csvfile, specific_params)
        timing_loop(model_name,
                    i,
                    config,
                    use_dtr,
                    specific_params,
                    writer,
                    memory_budget=specific_params.get('memory_budget', -1))
Exemple #23
0
def search(args):
    hits = common.read_json(common.read_url("http://ajax.googleapis.com/ajax/services/search/web?v=1.0&safe=off&q=", args))['responseData']['results']

    if hits:
        striphtml = lambda s: re.sub(r'<.+?>', '', re.sub(r'(  +|\n)', '', s))
        url = striphtml(hits[0]['unescapedUrl'])
        title = striphtml(hits[0]['titleNoFormatting'])
        content = striphtml(hits[0]['content'])
        result = "{1}: {2} -- {0}".format(url, title, content)
    else:
        result = "No hits."

    return result
Exemple #24
0
def attempt_parse_config(config_dir, target):
    """
    Returns the parsed config for the target (experiment or subsystem) if it exists.
    Returns None if the config is missing or could not be parsed.
    """
    conf_subdir = os.path.join(config_dir, target)
    if not check_file_exists(conf_subdir, 'config.json'):
        return None

    try:
        return read_json(conf_subdir, 'config.json')
    except Exception as e:
        return None
Exemple #25
0
def pp_search(args, url_re):
    """
    Search google for a Profound Programmer page matching the args.
    Return the url.
    """
    searchterms = '{} site:theprofoundprogrammer.com/post/'.format(args)
    hits = common.read_json(common.read_url("http://ajax.googleapis.com/ajax/services/search/web?v=1.0&safe=off&q=", searchterms))['responseData']['results']
    
    if not hits:
        return None

    striphtml = lambda s: re.sub(r'<.+?>', '', re.sub(r'  +', '', s))
    return striphtml(hits[0]['url'])
Exemple #26
0
def main():
    print "Processing"

    movies = common.read_json("tunefind.json")

    pool = Pool(5)
    results = [pool.apply_async(process_movie, [m]) for m in movies]

    updated_movies = []
    for w in results:
        w.wait()
        updated_movies.append(w.get())

    common.write_json("musicbrainz.json", updated_movies)
 def __init__(self,
              cascade_path,
              dst_dir,
              image_size=200,
              margin=0.2,
              max_scale=1.2):
     min_size = int(image_size / (1.0 + margin) / max_scale)
     self._detector = AnimeFaceDetector(cascade_path, min_size=min_size)
     self.dst_dir = dst_dir
     os.makedirs(self.dst_dir, exist_ok=True)
     self._image_size = (image_size, image_size)
     self._margin = margin
     rects = read_json(os.path.join(dst_dir, self._CACHE_FILENAME))
     self._rects = dict() if rects is None else rects
def check_error(experiment_name, model_name, specific_params, path_prefix):
    if not check_file_exists(path_prefix, 'errors.json'):
        return False
    logged_errors = read_json(path_prefix, 'errors.json')
    if experiment_name not in logged_errors:
        return False
    if model_name not in logged_errors[experiment_name]:
        return False
    errors = logged_errors[experiment_name][model_name]

    check_func = lambda err: lambda kv: err.get(kv[0]) == kv[1]
    if specific_params.get('kind') == 'ratio':
        check_func = lambda err: lambda kv: err.get(kv[0]) == kv[1] if kv[0] != 'memory_budget' else True

    return any(map(lambda err: all(map(check_func(err), specific_params.items())), errors))
Exemple #29
0
def _check_stage_status(target_status_dir, stage_name):
    filename = '{}.json'.format(stage_name)
    if not check_file_exists(target_status_dir, filename):
        return {
            'success': False,
            'message': '{} stage status missing'.format(stage_name)
        }

    try:
        return read_json(target_status_dir, filename)
    except:
        return {
            'success': False,
            'message': 'Failed to parse {} stage status'.format(stage_name)
        }
Exemple #30
0
def delete_duplication():
    entries = common.read_json(OUTPUT_DIR, ENTRIES_FILE_NAME)
    fixed_entries = []
    titles = []
    index = 1
    for entry in entries:
        title = entry['title']
        if title in titles: continue
        fixed = entry
        fixed['id'] = index
        fixed_entries.append(fixed)
        titles.append(title)
        index += 1

    common.write_json(fixed_entries, OUTPUT_DIR, 'entries_fixed.json')
def log_error(experiment_name, model_name, specific_params, inp, err_msg,
              path_prefix):
    err_info = {'input': inp, 'msg': err_msg}

    logged_errors = {}
    if check_file_exists(path_prefix, 'errors.json'):
        logged_errors = read_json(path_prefix, 'errors.json')
    if experiment_name not in logged_errors:
        logged_errors[experiment_name] = {}
    if model_name not in logged_errors[experiment_name]:
        logged_errors[experiment_name][model_name] = []
    logged_errors[experiment_name][model_name].append({
        'err_info': err_info,
        **specific_params
    })
    write_json(path_prefix, 'errors.json', logged_errors)
def load_from_web():
    print "Loading from Web"

    movies = common.read_json(JSON_IN_FILE)

    pool = Pool(5)
    worker = [pool.apply_async(process_movie, [m]) for m in movies]

    imdb_movies = []
    for w in worker:
        w.wait()
        result = w.get()
        if result is not None:
            imdb_movies.append(w.get())

    common.write_json(JSON_OUT_FILE, imdb_movies)
def load_from_web():
    print "Loading from Web"

    movies = common.read_json(JSON_IN_FILE)

    pool = Pool(5)
    worker = [pool.apply_async(process_movie, [m]) for m in movies]

    imdb_movies = []
    for w in worker:
        w.wait()
        result = w.get()
        if result is not None:
            imdb_movies.append(w.get())

    common.write_json(JSON_OUT_FILE, imdb_movies)
def convert_to_rdf():
    print ""
    print "Convert to RDF..."

    charts = common.read_json(JSON_OUT_FILE)

    g = Graph()
    g.bind("", NS_CHARTS)
    g.bind("dbpedia-owl", NS_DBPEDIA_OWL)
    g.bind("dbpprop", NS_DBPPROP)

    for c in charts:
        if c["date"] < "2005-01-01T00:00:00":
            continue

        chart = URIRef(BASE_URI % common.encodeString(
            datetime.strptime(c["date"],
                              "%Y-%m-%dT%H:%M:%S").strftime("%Y-%m-%d")))
        g.add((chart, RDF.type, NS_CHARTS.Chart))
        g.add((chart, NS_DBPEDIA_OWL.publicationDate,
               Literal(c["date"] + "Z", datatype=XSD.dateTime)))

        for t in c["tracks"]:
            artist = URIRef(BASE_URI % common.encodeString(t["artist"]))
            g.add((artist, RDF.type, NS_DBPEDIA_OWL.MusicalArtist))
            g.add((artist, RDFS.label, Literal(t["artist"])))
            g.add((artist, NS_DBPPROP.name, Literal(t["artist"])))

            song = URIRef(BASE_URI % common.encodeString(
                u"{0:s} - {1:s}".format(t['artist'], t["title"])))
            g.add((song, RDF.type, NS_DBPEDIA_OWL.Song))
            g.add((song, RDFS.label,
                   Literal(u"{0:s} - {1:s}".format(t['artist'], t["title"]))))
            g.add((song, NS_DBPPROP.title, Literal(t["title"])))
            g.add((song, NS_DBPEDIA_OWL.artist, artist))

            ranked = BNode()
            g.add((ranked, RDF.type, NS_CHARTS.RankedSong))
            g.add((ranked, NS_CHARTS.song, song))
            g.add((ranked, NS_CHARTS.position,
                   Literal(t["pos"], datatype=XSD.integer)))
            g.add((ranked, RDFS.label,
                   Literal(u"{0:s}: {1:s} - {2:s}".format(
                       t["pos"], t['artist'], t["title"]))))
            g.add((chart, NS_CHARTS.rankedSong, ranked))

    common.write_rdf(RDF_OUT_FILE, g)
def convert_to_rdf():
    """
    Converts the read data to triples
    """

    print ""
    print "Convert to RDF..."

    charts = common.read_json(JSON_OUT_FILE)

    g = Graph()
    g.bind("", NS_CHARTS)
    g.bind("dbpedia-owl", NS_DBPEDIA_OWL)
    g.bind("dbpprop", NS_DBPPROP)

    for c in charts:

        if c["date"] < CONVERT_FROM_DATE:
            continue

        chart = URIRef(
            BASE_URI % common.encodeString(datetime.strptime(c["date"], "%Y-%m-%dT%H:%M:%S").strftime("%Y-%m-%d")))
        g.add((chart, RDF.type, NS_CHARTS.Chart))
        g.add((chart, NS_DBPEDIA_OWL.publicationDate, Literal(c["date"] + "Z", datatype=XSD.dateTime)))

        for t in c["tracks"]:
            artist = URIRef(BASE_URI % common.encodeString(t["artist"]))
            g.add((artist, RDF.type, NS_DBPEDIA_OWL.MusicalArtist))
            g.add((artist, RDFS.label, Literal(t["artist"])))
            g.add((artist, NS_DBPPROP.name, Literal(t["artist"])))

            song = URIRef(BASE_URI % common.encodeString(u"{0:s} - {1:s}".format(t['artist'], t["title"])))
            g.add((song, RDF.type, NS_DBPEDIA_OWL.Song))
            g.add((song, RDFS.label, Literal(u"{0:s} - {1:s}".format(t['artist'], t["title"]))))
            g.add((song, NS_DBPPROP.title, Literal(t["title"])))
            g.add((song, NS_DBPEDIA_OWL.artist, artist))

            ranked = BNode()
            g.add((ranked, RDF.type, NS_CHARTS.RankedSong))
            g.add((ranked, NS_CHARTS.song, song))
            g.add((ranked, NS_CHARTS.position, Literal(t["pos"], datatype=XSD.integer)))
            g.add((ranked, RDFS.label, Literal(u"{0:s}: {1:s} - {2:s}".format(t["pos"], t['artist'], t["title"]))))

            g.add((chart, NS_CHARTS.rankedSong, ranked))

    common.write_rdf(RDF_OUT_FILE, g)
Exemple #36
0
 def read(self, var):
     if var == "room":
         self.room = common.read_json("animal_crossing/data/room.json", {})
     elif var == "member":
         self.member = common.read_json("animal_crossing/data/member.json", {})
     elif var == "ban":
         self.ban = common.read_json("animal_crossing/data/ban.json", {})
     elif var == "count":
         self.count = common.read_json("animal_crossing/data/count.json", {"count": 0})
     elif var == "queue":
         self.queue = common.read_json("animal_crossing/data/queue.json", {})
     elif var == "group_member":
         self.group_member = common.read_json("animal_crossing/data/group_member.json", {})
def load_from_web():
    print "Loading from Web..."

    network = pylast.LastFMNetwork(api_key=API_KEY, api_secret=API_SECRET)
    movies = common.read_json(JSON_IN_FILE)

    song_chunks = []
    for m in movies:
        if len(m["soundtrack"]) > 0:
            song_chunks.append(m["soundtrack"])

    pool = Pool(5)
    worker = [pool.apply_async(process_songs, [chunk, network]) for chunk in song_chunks]

    lastfm_songs = []
    for w in worker:
        w.wait()
        for s in w.get():
            lastfm_songs.append(s)

    common.write_json(JSON_OUT_FILE, lastfm_songs)
def convert_to_rdf():
    """
    Converts the read data to triples
    """

    print ""
    print "Convert to RDF..."

    movies = common.read_json(JSON_OUT_FILE)

    g = Graph()
    g.bind("", NS_TUNEFIND)
    g.bind("dbpedia-owl", NS_DBPEDIA_OWL)
    g.bind("dbpprop", NS_DBPPROP)

    for m in movies:
        movie = URIRef(BASE_URI % common.encodeString(m["title"]))
        g.add((movie, RDF.type, NS_DBPEDIA_OWL.Film))
        g.add((movie, RDFS.label, Literal(m["title"])))
        g.add((movie, NS_DBPPROP.title, Literal(m["title"])))

        for s in m["soundtrack"]:
            artist = URIRef(BASE_URI % common.encodeString(s["artist"]))
            g.add((artist, RDF.type, NS_DBPEDIA_OWL.MusicalArtist))
            g.add((artist, RDFS.label, Literal(s["artist"])))
            g.add((artist, NS_DBPPROP.name, Literal(s["artist"])))

            song = URIRef(BASE_URI % common.encodeString(u"{0:s} - {1:s}".format(s['artist'],  s["title"])))
            g.add((song, RDF.type, NS_DBPEDIA_OWL.Song))
            g.add((song, RDFS.label, Literal(u"{0:s} - {1:s}".format(s['artist'],  s["title"]))))
            g.add((song, NS_DBPPROP.title, Literal(s["title"])))
            g.add((song, NS_DBPEDIA_OWL.artist, artist))

            g.add((movie, NS_TUNEFIND.contains, song))

    common.write_rdf(RDF_OUT_FILE, g)
Exemple #39
0
def read_tasklist(path):
    data = read_json(path)
    if data is None:
        return set(), [], 1
    num = max([t['num'] for t in data['tasks'] ]) + 1
    return set(data['tags']), data['tasks'], num
def convert_to_rdf():
    """
    Converts the read data to triples
    """

    print ""
    print "Convert to RDF..."

    movies = common.read_json(JSON_OUT_FILE)

    g = Graph()
    g.bind("", NS_IMDB)
    g.bind("dbpedia-owl", NS_DBPEDIA_OWL)
    g.bind("dbpprop", NS_DBPPROP)

    for m in movies:
        if not release_filter(m):
            continue

        movie = URIRef(BASE_URI % common.encodeString(m["title"]))
        g.add((movie, RDF.type, NS_DBPEDIA_OWL.Film))
        g.add((movie, RDFS.label, Literal(m["title"])))
        g.add((movie, NS_DBPPROP.title, Literal(m["title"])))
        g.add((movie, NS_DBPEDIA_OWL.imdbId, Literal(m["imdbID"])))

        if "directors" in m:
            for name in m["directors"]:
                director = URIRef(BASE_URI % common.encodeString(name))
                g.add((director, RDF.type, NS_DBPEDIA_OWL.Person))
                g.add((director, RDFS.label, Literal(name)))
                g.add((director, NS_DBPPROP.name, Literal(name)))
                g.add((movie, NS_DBPEDIA_OWL.director, director))

        if "cast" in m:
            for cast in m["cast"][:CONVERT_MAX_CAST]:
                if cast["screen_name"] == "":
                    continue

                actor = URIRef(BASE_URI % common.encodeString(cast["name"]))
                g.add((actor, RDF.type, NS_DBPEDIA_OWL.Actor))
                g.add((actor, RDFS.label, Literal(cast["name"])))
                g.add((actor, NS_DBPPROP.name, Literal(cast["name"])))

                character = BNode()
                g.add((character, RDF.type, NS_IMDB.Character))
                g.add((character, RDFS.label, Literal(cast["screen_name"])))
                g.add((character, NS_IMDB.actedBy, actor))
                g.add((character, NS_IMDB.screenName, Literal(cast["screen_name"])))
                g.add((movie, NS_IMDB.cast, character))

        if "release_info" in m:
            for info in m["release_info"]:
                if "date" not in info:
                    continue

                if info["country"] not in CONVERT_RELEASE_COUNTRY:
                    continue

                release = BNode()
                g.add((release, RDF.type, NS_IMDB.ReleaseCountry))
                g.add((release, RDFS.label,
                       Literal(info["country"] if info["event"] == "" else info["country"] + " - " + info["event"])))
                g.add((release, NS_DBPEDIA_OWL.publicationDate, Literal(info["date"] + "Z", datatype=XSD.dateTime)))
                g.add((release, NS_DBPEDIA_OWL.comment, Literal(info["event"])))
                g.add((release, NS_DBPEDIA_OWL.country,
                       URIRef("http://dbpedia.org/resource/%s" % common.encodeString(info["country"]))))
                g.add((movie, NS_IMDB.releasedIn, release))

    common.write_rdf(RDF_OUT_FILE, g)
Exemple #41
0
	def load_data(self,dfile):
		try:
			self.data = common.read_json(dfile);
		except Exception as e:
			raise e;
def run(args):
    domain = args.domain
    outfile = args.domain + '_wy.txt'

    if not domain:
        print('usage: wydomain.py -d aliyun.com')
        sys.exit(1)

    # init _cache_path
    script_path = os.path.dirname(os.path.abspath(__file__))
    _cache_path = os.path.join(script_path, 'result/{0}'.format(domain))
    if not os.path.exists(_cache_path):
        os.makedirs(_cache_path, 0777)

    # alexa result json file
    logging.info("starting alexa fetcher...")
    _cache_file = os.path.join(_cache_path, 'alexa.json')
    result = Alexa(domain=domain).run()
    save_result(_cache_file, result)
    logging.info("alexa fetcher subdomains({0}) successfully...".format(len(result)))

    # threatminer result json file
    logging.info("starting threatminer fetcher...")
    _cache_file = os.path.join(_cache_path, 'threatminer.json')
    result = Threatminer(domain=domain).run()
    save_result(_cache_file, result)
    logging.info("threatminer fetcher subdomains({0}) successfully...".format(len(result)))

    # threatcrowd result json file
    logging.info("starting threatcrowd fetcher...")
    _cache_file = os.path.join(_cache_path, 'threatcrowd.json')
    result = Threatcrowd(domain=domain).run()
    save_result(_cache_file, result)
    logging.info("threatcrowd fetcher subdomains({0}) successfully...".format(len(result)))

    # sitedossier result json file
    logging.info("starting sitedossier fetcher...")
    _cache_file = os.path.join(_cache_path, 'sitedossier.json')
    result = Sitedossier(domain=domain).run()
    save_result(_cache_file, result)
    logging.info("sitedossier fetcher subdomains({0}) successfully...".format(len(result)))

    # netcraft result json file
    logging.info("starting netcraft fetcher...")
    _cache_file = os.path.join(_cache_path, 'netcraft.json')
    result = Netcraft(domain=domain).run()
    save_result(_cache_file, result)
    logging.info("netcraft fetcher subdomains({0}) successfully...".format(len(result)))

    # ilinks result json file
    logging.info("starting ilinks fetcher...")
    _cache_file = os.path.join(_cache_path, 'ilinks.json')
    result = Ilinks(domain=domain).run()
    save_result(_cache_file, result)
    logging.info("ilinks fetcher subdomains({0}) successfully...".format(len(result)))

    # chaxunla result json file
    logging.info("starting chaxunla fetcher...")
    _cache_file = os.path.join(_cache_path, 'chaxunla.json')
    result = Chaxunla(domain=domain).run()
    save_result(_cache_file, result)
    logging.info("chaxunla fetcher subdomains({0}) successfully...".format(len(result)))

    # google TransparencyReport result json file
    logging.info("starting google TransparencyReport fetcher...")
    result = TransparencyReport(domain=domain).run()
    _cache_file = os.path.join(_cache_path, 'googlect_subject.json')
    save_result(_cache_file, result.get('subjects'))
    _cache_file = os.path.join(_cache_path, 'googlect_dnsnames.json')
    save_result(_cache_file, result.get('dns_names'))
    logging.info("google TransparencyReport fetcher subdomains({0}) successfully...".format(len(result.get('dns_names'))))

    # Collection API Subdomains
    sub_files = [
        'alexa.json', 
        'chaxunla.json', 
        'ilinks.json', 
        'netcraft.json', 
        'sitedossier.json',
        'threatcrowd.json',
        'threatminer.json']

    # process all cache files
    subdomains = []
    for file in sub_files:
        _cache_file = os.path.join(_cache_path, file)
        json_data = read_json(_cache_file)
        if json_data:
            subdomains.extend(json_data)

    # process openssl x509 dns_names
    _cache_file = os.path.join(_cache_path, 'googlect_dnsnames.json')
    json_data = read_json(_cache_file)
    for sub in json_data:
        if sub.endswith(domain):
            subdomains.append(sub)

    # collection burte force subdomains
    _burte_file = os.path.join(_cache_path, 'dnsburte.json')
    if FileUtils.exists(_burte_file):
        json_data = read_json(_burte_file)
        if json_data:
            subdomains.extend(json_data)

    # save all subdomains to outfile
    subdomains = list(set(subdomains))
    _result_file = os.path.join(script_path, outfile)
    save_result(_result_file, subdomains)
    logging.info("{0} {1} subdomains save to {2}".format(
        domain, len(subdomains), _result_file))
Exemple #43
0
    def initialize(self):
        self.wait_for_message = False
        self.active_threads = []

        self.settings = read_json(read_file_or_die('config/general.json'))
# encoding: utf-8

import os
from tools.skynet import SkynetDomain
from utils.fileutils import FileUtils
from common import read_json

# init path
script_path = os.path.dirname(os.path.abspath(__file__))
result_file = os.path.join(script_path, 'domains.log')

# upload subdomains dict to skynet.
_subdomains = read_json(result_file)
skynet = SkynetDomain()
skynet.bulk_sync(_subdomains)