Esempio n. 1
0
def load_engine(sdf_files, feature_matrix, dimension):
    """
    Function that converts the given sdf_files into instances of the sdf_class, then loads them into nearpy Engine.

    Parameters
        sdf_files: a list of sdf_files with their pathname from the current directory. Intended to be fed in from `find_sdf(root_dir)`
        feature_matrix: matrix of training data features to be loaded into engine
        dimension: dimensionality of the feature vectors used for LSH (here: number of cluster centers)

    Returns
        engine: instance of a nearpy engine with all of sdf_files loaded
    
    Sample Usage
        >>> engine = load_engine(sdf_files)
    """
    #dimension here can be altered as well
    rbp = RandomBinaryProjections('rbp',10)
    engine = Engine(dimension, lshashes=[rbp])  

    count = 0
    for index,file_ in enumerate(sdf_files):
        #print file_
        if count % 100 == 0:
            print 'Converted %d files' %(count)
        converted = SDF(file_)
        converted.set_feature_vector(feature_matrix[index])
        converted.add_to_nearpy_engine(engine)
        count += 1
    return engine
Esempio n. 2
0
def load_engine(sdf_files, feature_matrix, dimension):
    """
    Function that converts the given sdf_files into instances of the sdf_class, then loads them into nearpy Engine.

    Parameters
        sdf_files: a list of sdf_files with their pathname from the current directory. Intended to be fed in from `find_sdf(root_dir)`
        feature_matrix: matrix of training data features to be loaded into engine
        dimension: dimensionality of the feature vectors used for LSH (here: number of cluster centers)

    Returns
        engine: instance of a nearpy engine with all of sdf_files loaded
    
    Sample Usage
        >>> engine = load_engine(sdf_files)
    """
    #dimension here can be altered as well
    rbp = RandomBinaryProjections('rbp', 10)
    engine = Engine(dimension, lshashes=[rbp])

    count = 0
    for index, file_ in enumerate(sdf_files):
        #print file_
        if count % 100 == 0:
            print 'Converted %d files' % (count)
        converted = SDF(file_)
        converted.set_feature_vector(feature_matrix[index])
        converted.add_to_nearpy_engine(engine)
        count += 1
    return engine