def calculateRequiredNCDs(featureFileDict, method, dimension, timeDelay, neighbourhoodSize, downSampleFactor, sequenceLength, featureName): requiredNCDs = [] featureFileIds = featureFileDict.keys() numFeatureFiles = len(featureFileIds) print 'Creating list of required NCDs...' for f1 in np.arange(numFeatureFiles - 1): featureFilePath1 = featureFileDict[featureFileIds[f1]].filePath pc1Id = featureFileDict[featureFileIds[f1]].pieceId pc1pfId = featureFileIds[f1] for f2 in np.arange(f1, numFeatureFiles): featureFilePath2 = featureFileDict[featureFileIds[f2]].filePath pc2Id = featureFileDict[featureFileIds[f2]].pieceId pc2pfId = featureFileIds[f2] ncdProps = NCDprops(pc1Id, pc1pfId, pc2Id, pc2pfId, method, dimension, timeDelay, neighbourhoodSize, downSampleFactor, sequenceLength, featureName, featureFilePath1, featureFilePath2) requiredNCDs.append(ncdProps) print 'Number of NCD files required for combination: %i' % len(requiredNCDs) return requiredNCDs
def createNCDfiles(existingNCDs, processPool, featureName, downSampleFactor, timeDelay, dimension, method, neighbourhoodSize, numFilesPerFolder, sequenceLength, weightMatrix=None, biases=None, featureOffset=0.0, featureScaling=1.0): ''' Inputs: :existingNCDs: a list of existing NCD files in order to avoid duplication :processPool: a pool of multiprocessing processes to use for running the script :featureName: the name of the feature e.g 'chroma', 'mfcc' :downSampleFactor: the factor to use in downsampling the original signals before creating CRPs :timeDelay: the time delay to use in creating the CRPs :method: the method to use in creating the CRPs :neighbourhoodSize: the neighbourhood size to use in creating the CRPs :numFilesPerFolder: the number of performances of each piece to use - set to None to use all performances :sequenceLength: fixed sequence length to normalise CRPs to (use 'var' for variable length) :weightMatrix: a matrix of weights (inputFeatureLength rows x outputFeatureLength columns) to transform the input feature files with before calculating the CRPs ''' mazurkasPath = FeatureFileProps.rootPath[featureName] mazurkaIds = getFolderNames(mazurkasPath, True)[:20] if existingNCDs is not None: existingNCDs = set(existingNCDs) # makes checking faster # Get performances from folders featureFileDict = getFeatureFileDictAllFolders(mazurkasPath, mazurkaIds, featureName, numFilesPerFolder) # Create list of required NCD files requiredNCDs = [] featureFileIds = featureFileDict.keys() numFeatureFiles = len(featureFileIds) print 'Checking for existing NCD files...' for f1 in np.arange(numFeatureFiles - 1): featureFilePath1 = featureFileDict[featureFileIds[f1]].filePath pc1Id = featureFileDict[featureFileIds[f1]].pieceId pc1pfId = featureFileIds[f1] for f2 in np.arange(f1, numFeatureFiles): featureFilePath2 = featureFileDict[featureFileIds[f2]].filePath pc2Id = featureFileDict[featureFileIds[f2]].pieceId pc2pfId = featureFileIds[f2] ncdProps = NCDprops(pc1Id, pc1pfId, pc2Id, pc2pfId, method, dimension, timeDelay, neighbourhoodSize, downSampleFactor, sequenceLength, featureName, featureFilePath1, featureFilePath2) if not NCDexists(ncdProps.getFileName(), existingNCDs=existingNCDs): requiredNCDs.append(ncdProps) print 'Number of NCD files missing for combination: %i' % len(requiredNCDs) # Create Required CRPs for NCD files if len(requiredNCDs) > 0: # Create CRP files and save to the CRPs folder print 'Calculating # of required CRP files' requiredCRPs = [] sourceCRPs = [] for requiredNCD in requiredNCDs: crp1 = requiredNCD.getCRP1() crp2 = requiredNCD.getCRP2() sourceCRPs.append(crp1) sourceCRPs.append(crp2) if not crp1.hasExistingFile(): requiredCRPs.append(crp1) if not crp2.hasExistingFile(): requiredCRPs.append(crp2) requiredCRPs = CRPprops.uniqueCRPprops(requiredCRPs) sourceCRPs = CRPprops.uniqueCRPprops(sourceCRPs) numRequiredCRPs = len(requiredCRPs) print 'Creating %i required CRP files' % numRequiredCRPs if numRequiredCRPs > 0: CRPargList = [] for crp in requiredCRPs: crp.weightMatrix = weightMatrix crp.biases = biases crp.featureOffset = featureOffset crp.featureScaling = featureScaling CRPargList.append((crp, )) processPool.map(multi_createCRPfile, CRPargList) # Load CRP files into memory print 'Loading %i CRP files' % len(sourceCRPs) CRPfiles = loadCRPfiles(sourceCRPs) # Create NCD files numNCDs = len(requiredNCDs) print 'Creating %i NCD files' % numNCDs NCDindex = 0 while NCDindex < numNCDs: NCDargList = [] for iNCD in np.arange(NCDindex, min(NCDindex + 100, numNCDs)): requiredNCD = requiredNCDs[iNCD] NCDfn = requiredNCD.getFileName() CRPtuple1 = requiredNCD.getCRP1().toTuple(False) CRPtuple2 = requiredNCD.getCRP2().toTuple(False) try: NCDargList.append( (NCDfn, CRPfiles[CRPtuple1], CRPfiles[CRPtuple2])) except: pass if NCDargList: processPool.map(multi_createNCDfile, NCDargList) NCDindex += 100 print '\r%i...' % NCDindex, # Delete CRP files print 'Deleting CRP files' for CRPfilename in getFileNames(CRPpath, '.npy', True): try: os.remove(CRPpath + CRPfilename) except: pass
def multi_createNCDfile(args): ''' Run wrapper for createNCDfile() ''' return NCDprops.createNCDfile(*args)