def load_engine(sdf_files, feature_matrix, dimension): """ Function that converts the given sdf_files into instances of the sdf_class, then loads them into nearpy Engine. Parameters sdf_files: a list of sdf_files with their pathname from the current directory. Intended to be fed in from `find_sdf(root_dir)` feature_matrix: matrix of training data features to be loaded into engine dimension: dimensionality of the feature vectors used for LSH (here: number of cluster centers) Returns engine: instance of a nearpy engine with all of sdf_files loaded Sample Usage >>> engine = load_engine(sdf_files) """ #dimension here can be altered as well rbp = RandomBinaryProjections('rbp',10) engine = Engine(dimension, lshashes=[rbp]) count = 0 for index,file_ in enumerate(sdf_files): #print file_ if count % 100 == 0: print 'Converted %d files' %(count) converted = SDF(file_) converted.set_feature_vector(feature_matrix[index]) converted.add_to_nearpy_engine(engine) count += 1 return engine
def load_engine(sdf_files, feature_matrix, dimension): """ Function that converts the given sdf_files into instances of the sdf_class, then loads them into nearpy Engine. Parameters sdf_files: a list of sdf_files with their pathname from the current directory. Intended to be fed in from `find_sdf(root_dir)` feature_matrix: matrix of training data features to be loaded into engine dimension: dimensionality of the feature vectors used for LSH (here: number of cluster centers) Returns engine: instance of a nearpy engine with all of sdf_files loaded Sample Usage >>> engine = load_engine(sdf_files) """ #dimension here can be altered as well rbp = RandomBinaryProjections('rbp', 10) engine = Engine(dimension, lshashes=[rbp]) count = 0 for index, file_ in enumerate(sdf_files): #print file_ if count % 100 == 0: print 'Converted %d files' % (count) converted = SDF(file_) converted.set_feature_vector(feature_matrix[index]) converted.add_to_nearpy_engine(engine) count += 1 return engine