def scrape(path): films = util.read_json(path) newfilms = list() for film in films: try: film['name'] = film['name'].replace('"', '') url = 'http://www.omdbapi.com/?t=' + film['name'] + '&y=&plot=full&r=json' print "Scraping " + url r = requests.get(url) response = json.loads(r.text) if 'Title' in response: # Film data found! film['metadata'] = response newfilms.append(film) print response['Title'] except: pass return newfilms
def __init__(self, unittest=False): """Constructor. Only executed once, since this is a :class:`~src.util.Singleton`. Reads and parses data in CMIP6_CV.json. """ if unittest: # value not used, when we're testing will mock out call to read_json # below with actual translation table to use for test file_ = 'dummy_filename' else: paths = core.PathManager() file_ = os.path.join(paths.CODE_ROOT, 'data', 'cmip6-cmor-tables','Tables','CMIP6_CV.json') self._contents = util.read_json(file_, log=_log) self._contents = self._contents['CV'] for k in ['product','version_metadata','required_global_attributes', 'further_info_url','Conventions','license']: # remove unecessary information del self._contents[k] # munge table_ids self._contents['table_id'] = dict.fromkeys(self._contents['table_id']) for tbl in self._contents['table_id']: self._contents['table_id'][tbl] = dc.asdict(CMIP6_MIPTable(tbl)) self.cv = dict() self._lookups = dict()
def __init__(self, unittest=False): if unittest: # value not used, when we're testing will mock out call to read_json # below with actual translation table to use for test file_ = 'dummy_filename' else: config = util_mdtf.ConfigManager() file_ = os.path.join(config.paths.CODE_ROOT, 'src', 'cmip6-cmor-tables', 'Tables', 'CMIP6_CV.json') self._contents = util.read_json(file_) self._contents = self._contents['CV'] for k in [ 'product', 'version_metadata', 'required_global_attributes', 'further_info_url', 'Conventions', 'license' ]: # remove unecessary information del self._contents[k] # munge table_ids self._contents['table_id'] = dict.fromkeys(self._contents['table_id']) for tbl in self._contents['table_id']: self._contents['table_id'][tbl] = parse_mip_table_id(tbl) self.cv = dict() self._lookups = dict()
def load(filename): json_ins = read_json(filename) return [ json_ins['n'], json_ins['m'], json_ins['clients'], ]
def set_cli_defaults(code_root, cli_config, install_config): """Write install-time configuration options to the cli.jsonc file used to set run-time default values. """ def _set_cli_default(template, name, default): template[name] = default in_path = os.path.join(code_root, cli_config['config_in']) out_path = os.path.join(code_root, cli_config['config_out']) print("Writing default settings to {}".format(out_path)) try: cli_template = util.read_json(in_path) except Exception as exc: fatal_exception_handler(exc, "ERROR: Couldn't read {}.".format(in_path)) for key in cli_config['default_keys']: try: _set_cli_default(cli_template, key, install_config[key]) except Exception as exc: fatal_exception_handler(exc, "ERROR: {} not set".format(key)) if os.path.exists(out_path): print("{} exists; overwriting".format(out_path)) os.remove(out_path) try: util.write_json(cli_template, out_path, sort_keys=False) except Exception as exc: fatal_exception_handler(exc, "ERROR: Couldn't write {}.".format(out_path))
def __init__(self, code_root=None, unittest=False): self._unittest = unittest self.conventions = util.WormDict() self.aliases = util.WormDict() self.modifier = util.read_json(os.path.join(code_root, 'data', 'modifiers.jsonc'), log=_log)
def __init__(self, unittest=False, verbose=0): if unittest: # value not used, when we're testing will mock out call to read_json # below with actual translation table to use for test config_files = ['dummy_filename'] else: config = ConfigManager() glob_pattern = os.path.join( config.paths.CODE_ROOT, 'src', 'fieldlist_*.jsonc' ) config_files = glob.glob(glob_pattern) # always have CF-compliant option, which does no translation self.axes = { 'CF': { "lon" : {"axis" : "X", "MDTF_envvar" : "lon_coord"}, "lat" : {"axis" : "Y", "MDTF_envvar" : "lat_coord"}, "lev" : {"axis" : "Z", "MDTF_envvar" : "lev_coord"}, "time" : {"axis" : "T", "MDTF_envvar" : "time_coord"} }} self.variables = {'CF': dict()} self.units = {'CF': dict()} for filename in config_files: d = util.read_json(filename) for conv in util.coerce_to_iter(d['convention_name']): if verbose > 0: print('XXX found ', conv) if conv in self.variables: print("ERROR: convention "+conv+" defined in "+filename+" already exists") raise ConventionError self.axes[conv] = d.get('axes', dict()) self.variables[conv] = util.MultiMap(d.get('var_names', dict())) self.units[conv] = util.MultiMap(d.get('units', dict()))
def end(): path = util.read_path_record() job_data = util.read_start() target_day = get_target_day() day = str(target_day.day) subj = [job_data["subject"], job_data["value"]] val = f"{job_data['start_time']}-{datetime.now().strftime('%H:%M')}" if os.path.exists(util.RECORD_DIR + path): data = util.read_json(util.RECORD_DIR + path) else: data = {} if day in data: date_val = data[day] if subj[0] in date_val: if subj[1] in date_val[subj[0]]: date_val[subj[0]][subj[1]].append(val) else: date_val[subj[0]][subj[1]] = [val] else: date_val[subj[0]] = {subj[1]: [val]} else: date_val = {subj[0]: {subj[1]: [val]}} data[day] = date_val util.write_json(util.RECORD_DIR + path, data) clear_start_record() return date_val
def __init__(self, code_root, cli_rel_path): cli_config = util.read_json(os.path.join(code_root, cli_rel_path)) self.case_list = cli_config.pop('case_list', []) self.pod_list = cli_config.pop('pod_list', []) super(FrameworkCLIHandler, self).__init__(code_root, cli_config, partial_defaults=None)
def calc_daily(path, day): data = util.read_json(util.RECORD_DIR + path) if not day in data: return {"data": {}, "str": "データが存在しません。"} use_data = data[day] data = _summarize(use_data) return {"data": data, "str": _mk_str(data)}
def _load_one_json(pod_): pod_dir = os.path.join(code_root, _pod_dir, pod_) settings_path = os.path.join(pod_dir, _file_name) try: d = util.read_json(settings_path) for section in ['settings', 'varlist']: if section not in d: raise AssertionError( f"'{section}' entry not found in '{_file_name}'.") except util.MDTFFileNotFoundError as exc: if not os.path.isdir(pod_dir): raise util.PodConfigError( (f"'{pod_}' directory not found in " f"'{os.path.join(code_root, _pod_dir)}'."), pod_) elif not os.path.isfile(settings_path): raise util.PodConfigError((f"'{_file_name}' file not found in " f"'{pod_dir}'."), pod_) else: raise exc except (JSONDecodeError, AssertionError) as exc: raise util.PodConfigError((f"Syntax error in '{_file_name}': " f"{str(exc)}."), pod_) except Exception as exc: raise util.PodConfigError( (f"Error encountered in reading '{_file_name}': " f"{repr(exc)}."), pod_) return d
def build_tropes(): files = { 'data/results/images/male/results.json' : {'type':'img', 'gender':'m'}, 'data/results/images/female/results.json' : {'type':'img', 'gender':'f'}, 'data/results/only_tropes-male.json' : {'type':'desc', 'gender':'m'}, 'data/results/only_tropes-female.json' : {'type':'desc', 'gender':'f'}, 'data/results/tropes_adjectives-female.json' : {'type':'adjs', 'gender':'f'}, 'data/results/tropes_adjectives-male.json' : {'type':'adjs', 'gender':'m'} } all_tropes = {} for filename, options in files.iteritems(): attr_type = options['type'] data = util.read_json(filename) attrs = map_array(data, attr_type) for k, v in attrs.iteritems(): if(k not in all_tropes): all_tropes[k] = {} all_tropes[k].update(v) if('gender' not in v): all_tropes[k]['gender'] = options['gender'] elif(v['gender'] != options['gender']): print "ERROR: genders don't match" return all_tropes
def _load_one_json(pod): d = dict() try: d = util.read_json( os.path.join(code_root, _pod_dir, pod, _pod_settings)) assert 'settings' in d except Exception: pass # better error handling? return d
def plot(json_path, save_path): if json_path is None: json_path = util.read_path_record() if save_path is None: save_path = util.read_path_record().replace(".json", ".png") data = util.read_json(util.RECORD_DIR + json_path) data = _aggregate(data) save_path = None if save_path is None else util.FIGURE_DIR + save_path _plot_data(data, save_path) return save_path
def _load_one_json(pod): try: d = util.read_json( os.path.join(code_root, _pod_dir, pod, _pod_settings) ) assert 'settings' in d except Exception as exc: raise util.PodConfigError( "Syntax error encountered when reading settings.jsonc.", pod) from exc return d
def __init__(self, code_root, settings_file): self.code_root = code_root _settings = util.read_json(os.path.join(code_root, settings_file)) self.settings = util.NameSpace.fromDict(_settings['settings']) self.cli_settings = _settings['cli'] self.config = util.NameSpace.fromDict( {k: self.settings.conda[k] for k in self._shared_conda_keys}) self.settings.conda['init_script'] = os.path.join( code_root, self.settings.conda['init_script'])
def edit(path, val, day): print(val) if path is None: path = util.read_path_record() if day is None: day = str(get_target_day().day) json_data = util.read_json(util.RECORD_DIR + path) json_data[day] = val util.write_json(util.RECORD_DIR + path, json_data)
def get_record(path, day): if path is None: path = util.read_path_record() if os.path.exists(util.RECORD_DIR + path): data = util.read_json(util.RECORD_DIR + path) else: data = {} try: data = data[day] except KeyError: data = {} return data
def get_test_data_configuration(): cwd = os.path.dirname(os.path.realpath(__file__)) # gets dir of currently executing script case_list = read_json(os.path.join(cwd,'pod_test_configs.json')) models = [] pods = [] for i, case in enumerate(case_list['case_list']): case_list['case_list'][i]['dir'] = 'MDTF_{}_{}_{}'.format( case['CASENAME'], case['FIRSTYR'], case['LASTYR']) models.append(case['CASENAME']) pods.extend(case['pod_list']) case_list['pods'] = pods case_list['models'] = models return case_list
def __init__(self, case_dict, parent): self.catalog = None self._config = collections.defaultdict(dict) self.config_by_id = dict() self._has_user_metadata = None super(ExplicitFileDataSource, self).__init__(case_dict, parent) # Read config file; parse contents into ExplicitFileDataSourceConfigEntry # objects and store in self.config_by_id assert (hasattr(self, 'attrs') and hasattr(self.attrs, 'config_file')) config = util.read_json(self.attrs.config_file, log=self.log) self.parse_config(config)
def __init__(self, code_root, cli_config, partial_defaults=None): self.code_root = code_root self.config = dict() self.partial_defaults = partial_defaults self.parser_groups = dict() # no way to get this from public interface? _actions of group # contains all actions for entire parser self.parser_args_from_group = collections.defaultdict(list) # manually track args requiring custom postprocessing (even if default # is used, so can't do with action=.. in argument) self.custom_types = collections.defaultdict(list) if isinstance(cli_config, six.string_types): # we were given a path to config file, instead of file's contents if not os.path.isabs(cli_config): cli_config = os.path.join(code_root, cli_config) cli_config = util.read_json(cli_config) self.parser = self.make_parser(cli_config)
def read_conventions(self, code_root, unittest=False): """ Read in the conventions from the Fieldlists and populate the convention attribute. """ if unittest: # value not used, when we're testing will mock out call to read_json # below with actual translation table to use for test config_files = [] else: glob_pattern = os.path.join(code_root, 'data', 'fieldlist_*.jsonc') config_files = glob.glob(glob_pattern) for f in config_files: try: d = util.read_json(f, log=_log) self.add_convention(d) except Exception as exc: _log.exception( "Caught exception loading fieldlist file %s: %r", f, exc) continue
def get_config(self, args=None): # assemble from CLI cli_dict = util.read_json( os.path.join(self.code_root, self.settings.cli_defaults['template'])) for key, val in iter(self.cli_settings.items()): cli_dict[key] = val # filter only the defaults we're setting for arg_gp in cli_dict['argument_groups']: arg_gp['arguments'] = [ arg for arg in arg_gp['arguments'] \ if arg['name'] in self.settings.cli_defaults['default_keys'] ] cli_obj = InstallCLIHandler(self.code_root, cli_dict, partial_defaults=self.config) cli_obj.parse_cli(args) self.config = util.NameSpace.fromDict(cli_obj.config)
def __init__(self, code_root=None, unittest=False): self._unittest = unittest self.conventions = util.WormDict() self.aliases = util.WormDict() if unittest: # value not used, when we're testing will mock out call to read_json # below with actual translation table to use for test config_files = [] else: glob_pattern = os.path.join( code_root, 'data', 'fieldlist_*.jsonc' ) config_files = glob.glob(glob_pattern) for f in config_files: try: d = util.read_json(f) self.add_convention(d) except Exception as exc: _log.exception("Caught exception loading fieldlist file %s: %r", f, exc) continue
def main(): data = util.read_json("record/test_data.json") new_dict = OrderedDict() for key in data: days_dict = OrderedDict() for subj in data[key]: new_subj = subj.split("/", 1) if len(new_subj) == 1: new_subj.append("") if new_subj[0] in days_dict: days_dict[new_subj[0]][new_subj[1]] = data[key][subj].split( ",") else: elem_dict = OrderedDict() elem_dict[new_subj[1]] = data[key][subj].split(",") days_dict[new_subj[0]] = elem_dict new_dict[key] = days_dict print(new_dict) util.write_json("record/sample_data.json", new_dict)
def predict(self, use_saved_model=False, threshold=0.5): """ 预测并保存预测信息 :param use_saved_model: :param threshold: :return: """ if use_saved_model: config = read_json(self._model_config) saved_model_path, use_features = config['model_name'], config[ 'features'] self._clf.load_model(saved_model_path) self.feature_used_name = use_features predict_data = self.load_predict_data() # 获取预测数据id列 user_id = predict_data[self._id_name] columns_drop(predict_data, [self._id_name]) predict_data = predict_data[self.feature_used_name] X_test = predict_data.values self.predict_label, self.predict_proba = self._clf.predict(X_test) self._save_predict_data(user_id, threshold)
def get_configuration(config_file='', check_input=False, check_output=False): # Redundant with code in util; need to fix this cwd = os.path.dirname(os.path.realpath(__file__)) # gets dir of currently executing script code_root = os.path.realpath(os.path.join(cwd, '..')) # parent dir of that if config_file == '': config_file = os.path.join(cwd,'..','src','mdtf_settings.json') # default config = read_json(config_file) config = parse_mdtf_args(None, config, rel_paths_root=code_root) config['paths']['md5_path'] = os.path.join(cwd,'checksums') # config['paths']['OBS_ROOT_DIR'] = os.path.realpath(config['paths']['OBS_ROOT_DIR']) # config['paths']['MODEL_ROOT_DIR'] = os.path.realpath(config['paths']['MODEL_ROOT_DIR']) # config['paths']['OUTPUT_DIR'] = os.path.realpath(config['paths']['OUTPUT_DIR']) # assert os.path.isdir(config['paths']['md5_path']) # if check_input: # assert os.path.isdir(config['paths']['OBS_ROOT_DIR']) # assert os.path.isdir(config['paths']['MODEL_ROOT_DIR']) # if check_output: # assert os.path.isdir(config['paths']['OUTPUT_DIR']) return config
def setUp_ConfigManager(config=None, paths=None, pods=None, unittest=True): PodDataTuple = collections.namedtuple( 'PodDataTuple', 'sorted_lists pod_data realm_data' ) cwd = os.path.dirname(os.path.realpath(__file__)) code_root, _ = os.path.split(cwd) dummy_config = read_json(os.path.join(cwd, 'dummy_config.json')) if config: dummy_config.update(config) if paths: dummy_config.update(paths) if not pods: pods = dict() dummy_cli_obj = NameSpace.fromDict({ 'code_root': code_root, 'config': dummy_config }) dummy_pod_data = PodDataTuple( pod_data=pods, realm_data=dict(), sorted_lists=dict() ) config = util_mdtf.ConfigManager(dummy_cli_obj, dummy_pod_data, unittest=unittest) if paths: config.paths.parse(paths, list(paths.keys()))
data = filmutil.get_film_base(film) if data: film_details[data['id']] = data return film_details if __name__ == "__main__": import argparse parser = argparse.ArgumentParser(description='Generate the info dict for tropes') parser.add_argument('--src', help='Source films dictionary', required=True) parser.add_argument('--roles', help='Film role files for both genders', required=False, action='append') parser.add_argument('--dest', help='Destination folder', required=True) parser.add_argument('--extended', help='If true, will output extended form', required=False) parser.add_argument('--name', help='Specific movie to generate information on', required=False) args = parser.parse_args() films = util.read_json(args.src) film_details = get_details(films, args.name, args.extended) movie_roles = None film_roles = film_details if args.roles: movie_roles = roles.get_roles(args.roles) film_roles = get_roles(film_details, movie_roles) write_detail_files(film_roles, args.dest)
def test(path): path = util.RECORD_DIR + path data = util.read_json(path) data["31"] = {"test": "10:00-12:00"} util.write_json(path, data)
'nodes': adj_nodes, 'links': adj_adj_links }, 'trope_adj_network': { # 'nodes': trope_nodes, 'links': trope_adj_links } } if __name__ == "__main__": import argparse import sys import json import string import os parser = argparse.ArgumentParser(description='Generate adjective info') parser.add_argument('--dest', help='source file', required=True) args = parser.parse_args() male_trope_adj = util.read_json('data/results/tropes_adjectives-male.json') female_trope_adj = util.read_json('data/results/tropes_adjectives-female.json') res = adjective_network(male_trope_adj, female_trope_adj) util.write_json(args.dest, res)
from src.trope import similar from src.trope import trope_dictionary as t_dict def write_tropes(tropes, dest): for trope in tropes.values(): util.write_json(join(dest, trope['id'] + '.json'), trope) if __name__ == "__main__": import argparse parser = argparse.ArgumentParser(description='Individual trope files') parser.add_argument('--dest', help='Destination folder', required=True) args = parser.parse_args() films = util.read_json('data/results/films/full_with_similarity.json') male_adj_ll = util.read_json('data/analysis/trope_ll-male.json') female_adj_ll = util.read_json('data/analysis/trope_ll-female.json') male_trope_adj = util.read_json('data/results/tropes_adjectives-male.json') female_trope_adj = util.read_json('data/results/tropes_adjectives-female.json') male_trope_films = util.read_json('data/results/films/trope_films-male.json') female_trope_films = util.read_json('data/results/films/trope_films-female.json') film_categories = util.read_json('data/results/films/categories.json') # build extended info tropes tropes = t_dict.build_tropes() tropes = t_dict.extended_info(tropes)
return { 'male': male, 'female': female } if __name__ == "__main__": import argparse parser = argparse.ArgumentParser(description='Generate adjective gender split data') parser.add_argument('--dest', help='source file', required=True) args = parser.parse_args() male_trope_adj = util.read_json('data/results/tropes_adjectives-male.json') female_trope_adj = util.read_json('data/results/tropes_adjectives-female.json') male_ll = util.read_json('data/analysis/ll-male.json') female_ll = util.read_json('data/analysis/ll-female.json') male_tropes = util.read_json('data/results/tropes-male.json') female_tropes = util.read_json('data/results/tropes-female.json') alltropes = Set([t[0] for t in male_tropes + female_tropes]) res = gender_split(male_ll, female_ll, male_trope_adj, female_trope_adj, len(alltropes)) util.write_json(args.dest, res)
def parse_input(filename): ''' Read in the json starting file ''' return util.read_json(args.src)
from config import config from src.util import read_json from src.environment import get_environment from src.exploration import EpsGreedy from src.agent import TabularQFunction from src.replay_buffer import ReplayBuffer parser = argparse.ArgumentParser() parser.add_argument('-env', type=str) args = parser.parse_args() ####################################### # Read inputs ####################################### env_input = read_json(args.env) ####################################### # Set up simulation ####################################### env = get_environment(env_input) explorer = EpsGreedy(num_actions=env_input['NUM_ACTIONS'], eps=config.EPS_START, eps_min=config.EPS_MIN, decay=config.DECAY) agent = TabularQFunction(state_size=env_input['STATE_SIZE'][0], num_actions=env_input['NUM_ACTIONS'], mu_init=config.Q_INIT, std_init=config.Q_STD)
DOING_MDTF_DATA_TESTS = ('--data_tests' in sys.argv) DOING_SETUP = DOING_MDTF_DATA_TESTS and not DOING_TRAVIS # All this is a workaround because tests are programmatically generated at # import time, but the tests are skipped at runtime. We're skipping tests # because we're not in an environment where we have the data to set them up, # so we just throw everything in an if-block to ensure they don't get generated # if they're going to be skipped later. if DOING_SETUP: config = shared.get_configuration('', check_output=True) md5_path = config['paths']['md5_path'] out_path = config['paths']['OUTPUT_DIR'] case_list = shared.get_test_data_configuration() output_checksums = read_json(os.path.join(md5_path, 'checksum_output.json')) # Python 3 has subTest; in 2.7 to avoid introducing other dependencies we use # the advanced construction presented in https://stackoverflow.com/a/20870875 # to programmatically generate tests class PNGTestSequenceMeta(type): def __new__(mcs, name, bases, test_dict): if DOING_SETUP: for case in case_list['case_list']: case_path = os.path.join(out_path, case['dir']) for pod in case['pod_list']: test_name = "test_output_png_md5_" + pod test_dict[test_name] = shared.generate_checksum_test( pod, case_path, output_checksums[case['dir']],
combined = sorted(smtp[0:n/2] + sftp[0:n/2], key=lambda x: x[1], reverse=True) combined = [trope[0] for trope in combined] return combined if __name__ == "__main__": import argparse parser = argparse.ArgumentParser(description='Generate trope lists') parser.add_argument('--dest', help='source file', required=True) parser.add_argument('--by_film_occurence', required=False, action='store_true') parser.add_argument('--by_ll', required=False, action='store_true') args = parser.parse_args() male_image_info = util.read_json('data/results/images/male/results.json') female_image_info = util.read_json('data/results/images/female/results.json') male_trope_info = util.read_json('data/results/only_tropes-male.json') female_trope_info = util.read_json('data/results/only_tropes-female.json') res = [] if args.by_ll: male_ll = util.read_json('data/analysis/ll-male.json') female_ll = util.read_json('data/analysis/ll-female.json') male_trope_adj = util.read_json('data/results/tropes_adjectives-male.json') female_trope_adj = util.read_json('data/results/tropes_adjectives-female.json') res = top_N_by_ll(100, male_ll, female_ll, male_trope_adj, female_trope_adj)
DOING_SETUP = DOING_MDTF_DATA_TESTS and not DOING_TRAVIS # All this is a workaround because tests are programmatically generated at # import time, but the tests are skipped at runtime. We're skipping tests # because we're not in an environment where we have the data to set them up, # so we just throw everything in an if-block to ensure they don't get generated # if they're going to be skipped later. if DOING_SETUP: config = shared.get_configuration('', check_input=True) md5_path = config['paths']['md5_path'] obs_path = config['paths']['OBS_DATA_ROOT'] model_path = config['paths']['MODEL_DATA_ROOT'] case_list = shared.get_test_data_configuration() obs_data_checksums = read_json( os.path.join(md5_path, 'checksum_obs_data.json')) model_data_checksums = read_json( os.path.join(md5_path, 'checksum_model_data.json')) # Python 3 has subTest; in 2.7 to avoid introducing other dependencies we use # the advanced construction presented in https://stackoverflow.com/a/20870875 # to programmatically generate tests class TestSequenceMeta(type): def __new__(mcs, name, bases, test_dict): if DOING_SETUP: for pod in case_list['pods']: test_name = "test_input_checksum_" + pod test_dict[test_name] = shared.generate_checksum_test( pod, obs_path, obs_data_checksums)
all_tropes[k] = {} all_tropes[k].update(v) if('gender' not in v): all_tropes[k]['gender'] = options['gender'] elif(v['gender'] != options['gender']): print "ERROR: genders don't match" return all_tropes if __name__ == "__main__": import argparse parser = argparse.ArgumentParser(description='Generate the info dict for tropes') parser.add_argument('--dest', help='source file', required=True) parser.add_argument('--filter', required=False) parser.add_argument('--extended', required=False, action='store_true') args = parser.parse_args() all_tropes = build_tropes() if args.filter: filter_list = util.read_json(args.filter) all_tropes = {k:v for (k,v) in all_tropes.iteritems() if k in filter_list} if args.extended: res = extended_info(all_tropes) util.write_json(args.dest, res) else: res = basic_info(all_tropes) util.write_json(args.dest, res)