def get_diff_filelist(feature_filename, filelist, feature_types, useHDF5=False, verbose=True): '''from a filelist to extract features, check if we previously have extracted features from these files before and return only subset of filelist containing files we haven't analyzed yet''' # check if file exists, otherwise we do not append check_filename = feature_filename + '.' + feature_types[ 0] # check just the first feature type, assuming the others will exist too if useHDF5: check_filename += '.h5' if not os.path.isfile(check_filename): if verbose: print("WARNING: No previous feature file " + check_filename + " found. Will create new feature files.") return filelist # unchanged, as is if not useHDF5: filelist_previous = read_csv_features(feature_filename, feature_types, ids_only=True, single_id_list=True) else: filelist_previous = load_multiple_hdf5_feature_files(feature_filename, feature_types, verbose=verbose, ids_only=True) filelist_diff = list(set(filelist) - set(filelist_previous)) if verbose: print("Filelist has", len(filelist), "entries, found", len(filelist_previous), "previously analyzed files in feature file(s).") print("Analyzing only", len(filelist_diff), "new files.") return filelist_diff
def read_feature_files(filenamestub, ext, separate_ids=True, id_column=0): from rp_feature_io import read_csv_features return read_csv_features(filenamestub, ext, separate_ids, id_column)
def read_feature_files(filenamestub,ext,separate_ids=True,id_column=0): from rp_feature_io import read_csv_features return read_csv_features(filenamestub,ext,separate_ids,id_column)