Example #1
0
def save_outputs(output, dst_dir=None, fn_prefix=None):
    if dst_dir is None:
        dst_dir = settings.processed_dir / 'data'
    utilities.mkdir(dst_dir)
    fn = 'data.json'
    if fn_prefix is not None:
        fn = fn_prefix + '_' + fn
    dst = dst_dir / fn
    with open(dst, 'w') as f:
        json.dump(output, f)
Example #2
0
sys_dir = cfg['PATHS'].get('sys_dir', root_dir / 'sys')
data_dir = cfg['PATHS'].get('data_dir', sys_dir / 'data')
raw_dir = cfg['PATHS'].get('raw_dir', data_dir / 'raw')
interim_dir = cfg['PATHS'].get('interim_dir', data_dir / 'interim')
processed_dir = cfg['PATHS'].get('processed_dir', data_dir / 'processed')
metadata_dir = cfg['PATHS'].get('metadata_dir', processed_dir / 'metadata')
models_dir = cfg['PATHS'].get('models_dir', sys_dir / 'models')
stanf_nlp_dir = cfg['PATHS'].get('stanf_nlp_dir',
                                 root_dir / 'stanford_nlp_models')
mln_dir = cfg['PATHS'].get('mln_dir', root_dir / 'mln_models')

# Get search and filter settings; default to empty lists
terms = eval(cfg['SEARCH'].get('terms', '[]'))
sources = eval(cfg['SEARCH'].get('sources', '[]'))
arxiv_drops = eval(cfg['FILTER'].get('drops', '[]'))

# make data directories if they don't already exist
dirs = [
    data_dir,
    raw_dir,
    interim_dir,
    processed_dir,
    metadata_dir,
    models_dir,
    stanf_nlp_dir,
    mln_dir,
]
dirs += [raw_dir / x for x in sources]
for _dir in dirs:
    utilities.mkdir(_dir)