Beispiel #1
0
def get_diff_filelist(feature_filename,
                      filelist,
                      feature_types,
                      useHDF5=False,
                      verbose=True):
    '''from a filelist to extract features, check if we previously have extracted features from these files before and
    return only subset of filelist containing files we haven't analyzed yet'''

    # check if file exists, otherwise we do not append
    check_filename = feature_filename + '.' + feature_types[
        0]  # check just the first feature type, assuming the others will exist too
    if useHDF5: check_filename += '.h5'
    if not os.path.isfile(check_filename):
        if verbose:
            print("WARNING: No previous feature file " + check_filename +
                  " found. Will create new feature files.")
        return filelist  # unchanged, as is

    if not useHDF5:
        filelist_previous = read_csv_features(feature_filename,
                                              feature_types,
                                              ids_only=True,
                                              single_id_list=True)
    else:
        filelist_previous = load_multiple_hdf5_feature_files(feature_filename,
                                                             feature_types,
                                                             verbose=verbose,
                                                             ids_only=True)

    filelist_diff = list(set(filelist) - set(filelist_previous))

    if verbose:
        print("Filelist has", len(filelist), "entries, found",
              len(filelist_previous),
              "previously analyzed files in feature file(s).")
        print("Analyzing only", len(filelist_diff), "new files.")

    return filelist_diff
Beispiel #2
0
def read_feature_files(filenamestub, ext, separate_ids=True, id_column=0):
    from rp_feature_io import read_csv_features
    return read_csv_features(filenamestub, ext, separate_ids, id_column)
Beispiel #3
0
def read_feature_files(filenamestub,ext,separate_ids=True,id_column=0):
    from rp_feature_io import read_csv_features
    return read_csv_features(filenamestub,ext,separate_ids,id_column)