예제 #1
0
def loadOrTrainLSHModel(forceGenerate=False):
    lshModel = None
    if os.path.exists("./pickle_files/lshModel.pickle") and not forceGenerate:
        print("LSH model found on disk")
        pickleIn = open("./pickle_files/lshModel.pickle", "rb")
        lshModel = pickle.load(pickleIn)
    else:
        print("Training LSH model")
        trainAudioDataAndRateArray = loadAllFiles("train", '')
        trainingData = generateData(trainAudioDataAndRateArray)

        lshModel = LSH()
        for data in trainingData:
            print("fileName", data[1])
            validFrameList = extractValidFrames(data[0])

            for i in range(0, validFrameList.shape[0]):
                reshapedValidFrame = validFrameList[i].reshape(1, -1)
                # print("reshapedValidFrame", reshapedValidFrame)
                lshModel.train(reshapedValidFrame, {
                    "name": data[1] + "_" + str(i),
                    "frameIndex": i
                })
                # hr.train(validFrames[1:2], data[1])

        # print("lshModel", lshModel)

        pickleOut = open("./pickle_files/lshModel.pickle", "wb")
        pickle.dump(lshModel, pickleOut)
        pickleOut.close()

    return lshModel
예제 #2
0
파일: nn.py 프로젝트: mikaelbaymani/nn
    if "-h" in sys.argv or "--help" in sys.argv:

        print(
            "Usage: ./nn.py [OPTION]                                    \n\n"
            "   -h | --help        Show this help message and exit        \n"
            "   --fetch <plugin>   Fetch new data with proprietary plugin \n"
            "   --train            Train LSH model                        \n"
            "   --query <ID>       Nearest Neighbor query                 \n")
        exit(0)

    if "--fetch" in sys.argv:

        pluginName = sys.argv[2].replace('.py', '')
        Dimport("%s" % pluginName, pluginName,
                FULLNAME('plugins'))(CONST.DATASET)

    if "--train" in sys.argv:

        dataframe = pd.read_csv(CONST.DATASET)
        corpus = TfidfVectorizer().fit_transform(dataframe['content'])

        lsh = LSH(corpus)
        model = lsh.train()

        pickle.dump(model, open(CONST.MODEL, 'wb'))

    if "--query" in sys.argv:
        print(query(sys.argv[2]))

# eof