def prepare_features(config): """load and prepare the features, either conventional or spectral""" if config['features']['type'] == 'conventional': features_path = os.path.join(config['data_path'], 'conventional_features') if config['features']['descriptor'] == 'all': all_features = defaultdict(lambda: np.array([])) feature_files = glob.glob(os.path.join(features_path, '*.csv')) for feature_file in feature_files: if 'haddad' in feature_file or 'saito' in feature_file: continue features = flib.read_feature_csv(feature_file) for key, value in features.items(): all_features[key] = np.hstack((all_features[key], value)) features = all_features max_len = max([len(val) for val in features.values()]) for key in list(features.keys()): if len(features[key]) < max_len: del features[key] print 'deleted a molecule because not all features available' else: feature_file = os.path.join(config['data_path'], 'conventional_features', config['features']['descriptor'] + '.csv') features = flib.read_feature_csv(feature_file) elif config['features']['type'] == 'spectral': feature_file = os.path.join(config['data_path'], 'spectral_features', 'large_base', 'parsed.pckl') spectra = pickle.load(open(feature_file)) features = flib.get_spectral_features(spectra, spec_type=config['features']['spec_type'], use_intensity=config['features']['use_intensity'], kernel_widths=config['features']['kernel_width'], bin_width=config['features']['bin_width']) features = flib.remove_invalid_features(features) if config['features']['properties_to_add']: flib.add_molecule_properties(features, config['features']['properties_to_add']) if config['features']['normalize']: features = flib.normalize_features(features) return features
def setUp(self): path = os.path.join(os.path.dirname(__file__), 'data', 'features') self.tmp_path = os.path.join(os.path.dirname(__file__), 'data') self.features = flib.read_feature_csv(os.path.join(path, 'test_features.csv'))