def retreiveEntityIds(queries, limitByDistribution): logger.info('Retreiving entity IDs...') global limitEntityQuery global labelMapping entityIds = [] for query in queries: logger.debug('Query:') logger.debug(query) label = query['label'] if label not in labelMapping: labelMapping.append(label) limit = None if limitByDistribution: limit = int(limitEntityQuery * labelDistributionAdjusted[labelMapping.index(label)]) else: limit = limitEntityQuery response = elastic_query.sendQuery( '/search?' + query['query'] + '&limit=' + str(limit), True) for entity in response['entities']: logger.debug('Received entity ' + str(entity['entityId']) + ', label: ' + label + '.') entityIds.append([entity['entityId'], label]) return [entityIds, labelMapping]
def retreiveEntityIds(queries, limitByDistribution): logger.info('Retreiving entity IDs...') global limitEntityQuery global labelMapping entityIds = [] for query in queries: logger.debug('Query:') logger.debug(query) label = query['label'] if label not in labelMapping: labelMapping.append(label) limit = None if limitByDistribution: limit = int(limitEntityQuery * labelDistributionAdjusted[labelMapping.index(label)]) else: limit = limitEntityQuery response = elastic_query.sendQuery('/search?' + query['query'] + '&limit=' + str(limit), True) for entity in response['entities']: logger.debug('Received entity ' + str(entity['entityId']) + ', label: ' + label + '.') entityIds.append([entity['entityId'], label]) return [entityIds,labelMapping]
def retreiveImageIds(entityIds): logger.info('Retreiving image IDs linked to entities...') imageIds = [] global labelDistributionAdjusted for entity in entityIds: response = elastic_query.sendQuery('/entity/' + str(entity[0]), True) counter = 0 for image in response['images']: if imagePerEntity != 0 and counter < imagePerEntity: logger.debug('Received image ' + str(image['imageId']) + ', label: ' + str(entity[1]) + '.') imageIds.append([image['imageId'], entity[1]]) counter += 1 else: break return imageIds
def streamFiles(exportFolder, dictionary, labelMapping): logger.info('Downloading image files...') nthAsTestImage = 5 counter = 0 lastPercent = -1 invalidLogPath = exportFolder + '/invalid_image_ids.txt' trainingFolderPath = exportFolder + '/train/' testFolderPath = exportFolder + '/test/' for label in labelMapping: if not os.path.exists( os.path.dirname(trainingFolderPath + label + '/')): os.makedirs(os.path.dirname(trainingFolderPath + label + '/')) logger.debug('Created folder: ' + trainingFolderPath + label + '/') if not os.path.exists(os.path.dirname(testFolderPath + label + '/')): os.makedirs(os.path.dirname(testFolderPath + label + '/')) logger.debug('Created folder: ' + testFolderPath + label + '/') if harvestingTest == False: logger.info('Downloading images, every ' + str(nthAsTestImage) + 'th is beeing picked as a test image.') else: logger.info('Skipping image downloads. Just writing index info files.') trainingInfoPath = exportFolder + '/label_info_training.txt' testInfoPath = exportFolder + '/label_info_test.txt' for imageId, label in dictionary.items(): if harvestingTest == False: imageData = elastic_query.sendQuery('/image/' + str(imageId), False) if imageData == None: with open(invalidLogPath, 'a') as log: log.write(str(imageId) + '\n') continue imageFileName = str(imageId) + '.jpg' targetPath = '' infoPath = None if counter % nthAsTestImage == 0: targetPath = testFolderPath + label + '/' + imageFileName infoPath = testInfoPath else: targetPath = trainingFolderPath + label + '/' + imageFileName infoPath = trainingInfoPath labelInfoString = targetPath + ' ' + str( labelMapping.index(label)) + '\n' if harvestingTest == False: with open(targetPath, 'w+') as out: out.write(imageData) with open(infoPath, 'a') as info: info.write(labelInfoString) counter += 1 percent = int((float(counter) / float(len(dictionary))) * 100) if percent - lastPercent > 0: lastPercent = percent logger.info('\nDone.')
def streamFiles(exportFolder, dictionary, labelMapping): logger.info('Downloading image files...') nthAsTestImage = 5 counter = 0 lastPercent = -1 invalidLogPath = exportFolder + '/invalid_image_ids.txt' trainingFolderPath = exportFolder + '/train/' testFolderPath = exportFolder + '/test/' for label in labelMapping: if not os.path.exists(os.path.dirname(trainingFolderPath + label + '/')): os.makedirs(os.path.dirname(trainingFolderPath + label + '/')) logger.debug('Created folder: ' + trainingFolderPath + label + '/') if not os.path.exists(os.path.dirname(testFolderPath + label + '/')): os.makedirs(os.path.dirname(testFolderPath + label + '/')) logger.debug('Created folder: ' + testFolderPath + label + '/') if harvestingTest == False: logger.info('Downloading images, every '+ str(nthAsTestImage) + 'th is beeing picked as a test image.') else: logger.info('Skipping image downloads. Just writing index info files.') trainingInfoPath = exportFolder + '/label_info_training.txt' testInfoPath = exportFolder + '/label_info_test.txt' for imageId, label in dictionary.items(): if harvestingTest== False: imageData = elastic_query.sendQuery('/image/' + str(imageId), False) if imageData == None: with open(invalidLogPath, 'a') as log: log.write(str(imageId) + '\n') continue imageFileName = str(imageId) + '.jpg' targetPath = '' infoPath = None if counter % nthAsTestImage == 0: targetPath = testFolderPath + label + '/' + imageFileName infoPath = testInfoPath else: targetPath = trainingFolderPath + label + '/' + imageFileName infoPath = trainingInfoPath labelInfoString = targetPath + ' ' + str(labelMapping.index(label)) + '\n' if harvestingTest == False: with open(targetPath, 'w+') as out: out.write(imageData) with open(infoPath, 'a') as info: info.write(labelInfoString) counter += 1 percent = int((float(counter) / float(len(dictionary))) * 100) if percent - lastPercent > 0: lastPercent = percent logger.info('\nDone.')