def test_determine_feats_used(): """Test determine_feats_used""" for suffix in ["features.csv", "classes.npy"]: shutil.copy( pjoin(DATA_PATH, "test_%s" % suffix), pjoin(cfg.FEATURES_FOLDER, "TEST001_%s" % suffix)) feats_used = pred.determine_feats_used("TEST001") npt.assert_array_equal(feats_used, ["meta1", "meta2", "meta3", "std_err","amplitude"]) for fname in ["TEST001_features.csv", "TEST001_classes.npy"]: os.remove(pjoin(cfg.FEATURES_FOLDER, fname))
def pred_featurize_reduce(iter, params): """Generate features as reduce step in Disco's map-reduce. Generator. Implementation of reduce stage in map-reduce process, for model prediction feature generation of time series data. This function is never directly called, but rather passed as a parameter to the Disco `Job()` object's `run()` method. Parameters ---------- iter : iterable Iterable of tuples each containing the file name of a time series data file to be used for featurization and an unused placeholder string. params : dict Dictionary of parameters for use in map-reduce process. Yields ------ tuple A two-element tuple containing the file name of the time series data set as its first element, and a two-element list containing the extracted features (dict) and the original time series data (list of lists) as its the second element. """ featset_key = params['featset_key'] custom_features_script = params['custom_features_script'] meta_features = params['meta_features'] import os from mltsp import cfg from mltsp import predict_class as pred import ntpath from disco.util import kvgroup for fname, junk in kvgroup(sorted(iter)): if fname[:7] == "file://": fname = fname.replace("file://", "") if os.path.isfile(fname): fpath = fname elif os.path.isfile(os.path.join(params["tmp_dir_path"], fname)): fpath = os.path.join(params["tmp_dir_path"], fname) elif os.path.isfile( os.path.join(os.path.join(cfg.UPLOAD_FOLDER, "unzipped"), fname)): fpath = os.path.join( os.path.join(cfg.UPLOAD_FOLDER, "unzipped"), fname) else: print((fname if cfg.UPLOAD_FOLDER in fname else os.path.join(cfg.UPLOAD_FOLDER, fname)) + " is not a file...") if (os.path.exists(os.path.join(cfg.UPLOAD_FOLDER, fname)) or os.path.exists(fname)): print("But it does exist on the disk.") else: print("and in fact it doesn't even exist.") continue features_to_use = pred.determine_feats_used(featset_key) big_feats_and_tsdata_dict = pred.featurize_single( fpath, features_to_use, custom_features_script, meta_features) try: os.remove(fpath) except Exception as e: print(e) short_fname = ntpath.basename(fpath).split("$")[0] all_features = big_feats_and_tsdata_dict[short_fname]["features_dict"] ts_data = big_feats_and_tsdata_dict[short_fname]["ts_data"] yield short_fname, [all_features, ts_data]