def getImages(params): print(params) for p in params: p = Stemmer.stem(p) tags = kv_labels.getAll(p) for t in tags: images = kv_images.get(t[0]) print(images) # Close KeyValues Storages kv_labels.close() kv_images.close()
def store(num_imag): #Loading images and labels from file images = ParseTripe.ParseTriples("images.ttl") labels = ParseTripe.ParseTriples("labels_en.ttl") try: imagesTable = dynamodbClient.describe_table(TableName="images") except dynamodbClient.exceptions.ResourceNotFoundException: imagesTable = createImagesTable('images') try: labelsTable = dynamodbClient.describe_table(TableName="labels") except dynamodbClient.exceptions.ResourceNotFoundException: labelsTable = createImagesTable('labels') tempImages = {} num = int(num_imag) while(num > 0): line = images.getNext() if "depiction" in line[1]: if imagesTable.get(line[0]) is None: tempImages[line[0]] = 1 else: tempImages[line[0]] += 1 put('images', line[0], str(tempImages[line[0]]), line[2]) num -= 1 tag = {} label = labels.getNext() print("creating labels") while(label): if label[1] == "http://www.w3.org/2000/01/rdf-schema#label": #label[0] in tempImages and labelsTable = label[2].split(" ") for entry in labelsTable: #Save stem word entry = Stemmer.stem(entry) #if there is already an entry with the same label then increases the sort value if tag.get(entry) is None: tag[entry] = 0 else: tag[entry] += 1 #save sort word: table, keyword, inx and value put('labels', entry, str(tag[entry]), label[0]) label = labels.getNext()
def searchWords(args): # Make connections to KeyValue kv_labels = KeyValue.SqliteKeyValue("sqlite_labels.db", "labels", sortKey=True) kv_images = KeyValue.SqliteKeyValue("sqlite_images.db", "images") args = args.split() for arg in args: print(arg) arg = Stemmer.stem(arg) labels = kv_labels.getAll(arg) if (len(labels) == 0): print('Label not found') else: for label in labels: #print(label[0]) image = kv_images.get(label[0]) print(image) # Close KeyValues Storages kv_labels.close() kv_images.close()
# steam_arg = Stemmer.stem(sys.argv[i]) # labels = [] # images = [] # label_pk = steam_arg # label_sort = 0 # while kv_labels.getSort(label_pk, str(label_sort)): # labels.append(kv_labels.getSort(label_pk, str(label_sort))) # label_sort += 1 # for l in labels: # images.append(kv_images.get(l)) # dict_search[steam_arg] = images # print(dict_search) # else: # print("Missing arguments..") # Searching using Dynamodb # We receive arguments from the consoline. Each argument is a keyword to search related images to it. # If we don't have any arguments I print a message showing it. if (arguments > 1): for i in range(1, arguments): steam_arg = Stemmer.stem(sys.argv[i]) dict_search[steam_arg] = d.search(steam_arg) print(dict_search) else: print("Missing arguments..") # Close KeyValues Storages kv_labels.close() kv_images.close()
if (triple[1] == 'http://xmlns.com/foaf/0.1/depiction'): kv_images.put(triple[0], triple[2]) countImages += 1 #Parse Labels to the Label Collection #Store sortKey per keys terms = {} #Get the first label label = parseTriplesLabels.getNext() while (label): if (kv_images.get(label[0]) and label[1] == 'http://www.w3.org/2000/01/rdf-schema#label'): labels = label[2].split() for l in labels: l = Stemmer.stem(l) if (terms.get(l) is None): terms[l] = 1 else: terms[l] += 1 kv_labels.putSort(l, str(terms[l]), label[0]) #print(label[0]) label = parseTriplesLabels.getNext() #To verify #print(terms) #Utrecht example of a label with two articles # Close KeyValues Storages kv_labels.close()
import keyvalue.sqlitekeyvalue as KeyValue import keyvalue.parsetriples as ParseTripe import keyvalue.stemmer as Stemmer import sys # Make connections to KeyValue kv_labels = KeyValue.SqliteKeyValue("./SQLDB/sqlite_labels.db","labels",sortKey=True) kv_images = KeyValue.SqliteKeyValue("./SQLDB/sqlite_images.db","images") # Process Logic. arg = sys.argv for i in range(1, len(arg)): a = arg[i] stemmedWord = Stemmer.stem(a) category = kv_labels.get(stemmedWord) if category is None: print("No se encontraron imágenes con " + a) else: image = kv_images.get(category) print("Imagen: "+ image + "\n") # Close KeyValues Storages kv_labels.close() kv_images.close()
from botocore.exceptions import ClientError # Make connections to KeyValue kv_labels = KeyValue.DynamoDB("labels") kv_images = KeyValue.DynamoDB("images") try: # Process Logic. print("-------------------------------------------") labelsDataset = ParseTriple.ParseTriples('./datasets/labels_en.ttl') for i in range(1, 1000): word = labelsDataset.getNext() category = word[0] label = word[2] steam = Stemmer.stem(label) for x in steam.split(' '): print("category: " + category + " ____________ labe:" + x) kv_labels.put(x, len(x), category) print("-------------------------------------------") print("-------------------------------------------") imagesDataset = ParseTriple.ParseTriples('./datasets/images.ttl') for i in range(0, 2000): img = imagesDataset.getNextImage() category = img[0] imgPath = img[2] print("category: " + category + " _________ imgPath: " + imgPath) kv_images.put(category, len(category), imgPath) except ClientError as e:
#print('**********Images***********') for i in range(iteration): line = ParseTripe.ParseTriples.getNext(file) images.append({'key': line[0], 'Skey': i, 'value': line[2]}) if line[0][38:] not in EveryLine: EveryLine.append(line[0][38:]) #print('Key ',line[0], ' SortKey ', i , ' Value',line[2]) file.close() file = open(filename2, "r", errors='ignore') #print('**********Labels***********') for i in range(iteration): line = ParseTripe.ParseTriples.getNext(file) if line[0][38:] in EveryLine: arrange = line[2].split(' ', ) for w in arrange: word = Stemmer.stem(w) labels.append({ 'key': word, 'Skey': int(line[0][38:]), 'value': line[0] }) #print('Key ', word, ' SortKey ', line[0][38:] , ' Value',line[0]) else: i = i - 1 file.close() #print(EveryLine) DynDBStorage.dynamostorage.StoreInfo(images, labels, 'kv_images', 'kv_labels')
import keyvalue.sqlitekeyvalue as KeyValue import keyvalue.parsetriples as ParseTripe import keyvalue.stemmer as Stemmer # Make connections to KeyValue kv_labels = KeyValue.SqliteKeyValue("sqlite_labels.db", "labels",sortKey=True) kv_images = KeyValue.SqliteKeyValue("sqlite_images.db", "images") wordToSearch = input("Ingresa las palabras a buscar") words = wordToSearch.split() for word in words: stemmedWord = Stemmer.stem(word) result = kv_labels.getAll(stemmedWord) print("result", result) images = [] for res in result: image = kv_images.get(res[0]) #print(image) if image != None: images.append(image) print("images of the word:",word,",:",images) # Process Logic. # Close KeyValues Storages
temp_arr = [temp_images[0], temp_images[2]] images.append(temp_arr) kv_images.put(str(temp_images[0]), str(temp_images[2])) if not temp_images[0] in temp_cat: temp_cat.append(temp_images[0]) temp += 1 temp = 0 while temp < 100: temp_labels = labels_ttl.getNext() if (temp_labels[1] == "http://www.w3.org/2000/01/rdf-schema#label"): len_labels = temp_labels[2].split(" ") sort_value = temp_labels[0].split("/")[4] if (len(len_labels) > 1): for label in len_labels: if temp_labels[0] in temp_cat: stemmed_word = Stemmer.stem(label) temp_arr = [stemmed_word, sort_value, temp_labels[0]] terms.append(temp_arr) kv_labels.putSort(str(stemmed_word), str(sort_value), str(temp_labels[0])) else: temp_arr = [temp_labels[2], sort_value, temp_labels[0]] terms.append(temp_arr) kv_labels.putSort(str(stemmed_word), str(sort_value), str(temp_labels[0])) temp += 1 print("Labels processed") # Close KeyValues Storages kv_labels.close()
import keyvalue.stemmer as Stemmer import keyvalue.dynamostorage as DynamoStorage import sys AWS_REGION = "us-east-1" # Make connections to Dynamodb db_images = DynamoStorage.DynamoStorage('images', region=AWS_REGION) db_labels = DynamoStorage.DynamoStorage('terms', True, AWS_REGION) search = [] if len(sys.argv) > 1: for x in range(1, len(sys.argv)): search.append(Stemmer.stem(sys.argv[x])) result = db_labels.getAll(search) images = db_images.getAll(result) for image in images: print(image)
# Make connections to KeyValue kv_images = KeyValueDynamo.DynamodbKeyValue("images") kv_labels = KeyValueDynamo.DynamodbKeyValue("labels") # Process Logic. parse_images = ParseTriples.ParseTriples('./data/images.ttl') parse_labels = ParseTriples.ParseTriples('./data/labels_en(1)/labels_en.ttl') # Insert Images for i in range(10000): url = parse_images.getNext() key = url[0] #category relationship = url[1] value = url[2] #Image URL if relationship == 'http://xmlns.com/foaf/0.1/depiction': print("images: " + str(i)) kv_images.put(key, len(key), value) # Insert Labels for i in range(30000): url = parse_labels.getNext() value = url[0] #category relationship = url[1] keys = Stemmer.stem(url[2]) #label if relationship == 'http://www.w3.org/2000/01/rdf-schema#label': img_exists = kv_images.get(value, len(value)) if 'Item' in img_exists.keys(): for key in keys.split(' '): print("labels: " + str(i)) kv_labels.put(key, len(key), value)
import keyvalue.stemmer as Stemmer import sys # Make connections to KeyValue kv_labels = KeyValue.SqliteKeyValue("db/sqlite_labels.db","labels",sortKey=True) kv_images = KeyValue.SqliteKeyValue("db/sqlite_images.db","images") # Process Logic. command_querys=[] images=[] category="" image="" sys.argv.pop(0) for query in sys.argv: stemmed_query = Stemmer.stem(query) categorys = kv_labels.getAll(stemmed_query) print(categorys[0]) if categorys is not None: for category in categorys: image = kv_images.get(str(category[0])) if image is not None: images.append(image) print(images) # Close KeyValues Storages kv_labels.close() kv_images.close()
def dynamoStorage(num): #GET and/or create Images Table try: images = dynamodbClient.describe_table(TableName="images") except dynamodbClient.exceptions.ResourceNotFoundException: print('Could not found `images` table') images = createImagesTable('images') print('Images table load') #GET and/or create Labels try: labels = dynamodbClient.describe_table(TableName="labels") except dynamodbClient.exceptions.ResourceNotFoundException: print('Could not found `labels` table') labels = createImagesTable('labels') print('Labels table load') #Read Images and Labels from the file parseTriplesImages = ParseTripe.ParseTriples("images.ttl") parseTriplesLabels = ParseTripe.ParseTriples("labels_en.ttl") print('Images file load') print('Labels file load') images = {} #Filter and upload images countImages = 1 #TODO - verify this validation num = int(num) while (countImages <= num): triple = parseTriplesImages.getNext() if (triple[1] == 'http://xmlns.com/foaf/0.1/depiction'): if (images.get(triple[0]) is None): images[triple[0]] = 1 else: images[triple[0]] += 1 put('images', triple[0], str(images[triple[0]]), triple[2]) countImages += 1 print('Images stored in DynamoDB') terms = {} #Filter and upload labels label = parseTriplesLabels.getNext() while (label): if (label[0] in images and label[1] == 'http://www.w3.org/2000/01/rdf-schema#label'): labels = label[2].split() for l in labels: l = Stemmer.stem(l) if (terms.get(l) is None): terms[l] = 1 else: terms[l] += 1 put('labels', l, str(terms[l]), label[0]) label = parseTriplesLabels.getNext() print('Labels stored in DynamoDB')
def searchlabels(key): return kv_labels.get(key) def searchimg(key): if (key): return kv_images.get(key) return print( "'Academy', 'america','Academic','Angola','Astronomer','Awards','acid','Algeria','Alchemy','Astronaut', 'Autism', " "'Android', 'Algorithm' ") args = [ 'Academy', 'america', 'Academic', 'Angola', 'Astronomer', 'Awards', 'acid', 'Algeria', 'Alchemy', 'Astronaut', 'Autism', 'Android', 'Algorithm' ] for word in args: stem = Stemmer.stem(word) category = searchlabels(stem) img = searchimg(category) if (img): print(stem + " : " + img) # Close KeyValues Storages kv_labels.close() kv_images.close()
while(img and len(dict_imgs) < n): # Check if the image have not been added and verify the type of relation. if(dict_imgs.get(img[0]) == None and img[1] == "http://xmlns.com/foaf/0.1/depiction"): dict_imgs[img[0]] = img[2] img = parser_images.getNextImage() i += 1 print("LABELS") print("Proccesing...") i = 0 proccesed_imgs = 0 while(label and proccesed_imgs < n): # Check if the current label has a related image in the images dict, if not we don't add the label. if (dict_imgs.get(label[0]) and label[1] == 'http://www.w3.org/2000/01/rdf-schema#label'): proccesed_imgs += 1 steam = Stemmer.stem(label[2]).strip() steam_keys = steam.split(' ') # Create set of steam keys # Example: The real value of the money -> {the, real, value, of, money} Prevent duplicate words in category set_steam_keys = set() for aux in steam_keys: set_steam_keys.add(aux) # We use two dictionaries: dict_labels_sort and dict_labels. # The first one is used to handle duplicate words. For example: # We have two categories, North America and South America, wichi are stemmed # as the follow: {noth, america}, {south, america} # As we notice, we will have the same keyword for two differente categories, so we need # to add a sort key. I handle this with a dictionary: # For the fisrt category dict_labels_sort will be: # { # "america": 0
import keyvalue.stemmer as Stemmer import keyvalue.dynamostorage as dynamo from botocore.exceptions import ClientError kv_labels = dynamo.DynamodbKeyValue('terms') kv_images = dynamo.DynamodbKeyValue('images') # Process Logic. parse_images = ParseTriples.ParseTriples('./data/images.ttl') parse_labels = ParseTriples.ParseTriples('./data/labels_en.ttl') # Insert Images for i in range(10000): line = parse_images.getNext() category = line[0] B = line[1] imageURL = line[2] if B == 'http://xmlns.com/foaf/0.1/depiction': kv_images.put(category, i, imageURL) # Insert Labels for i in range(5000): line = parse_labels.getNext() category = line[0] B = line[1] terms = line[2] if B == 'http://www.w3.org/2000/01/rdf-schema#label': for token in terms.split(' '): stemmedWord = Stemmer.stem(token) kv_labels.put(stemmedWord, i, category)
kv_images.put(image[0], image[2]) #i only increases if the image is not a thumbnail i += 1 labels = ParseTripe.ParseTriples("labels_en.ttl") label = labels.getNext() tag = {} while(label): #Check if the image on the label is inside of the kv_images if kv_images.get(label[0]): #separate labels for each word entries = label[2].split(" ") for entry in entries: #Save stem word entry = Stemmer.stem(entry) #if there is already an entry with the same label then increases the sort value if tag.get(entry) is None: tag[entry] = 0 else: tag[entry] += 1 #save sort word kv_labels.putSort(entry, str(tag[entry]), label[0]) label = labels.getNext() # Close KeyValues Storages kv_labels.close() kv_images.close()
# Make connections to KeyValue kv_labels = KeyValue.SqliteKeyValue("sqlite_labels.db", "labels", sortKey=True) kv_images = KeyValue.SqliteKeyValue("sqlite_images.db", "images") # Process Logic. imageLine = ParseTriple.ParseTriples('./dataset/images.ttl') labelLine = ParseTriple.ParseTriples('./dataset/labels_en.ttl') repeatedWords = {} for i in range(int(sys.argv[1])): imageTriple = imageLine.getNext() if imageTriple[1] == 'http://xmlns.com/foaf/0.1/depiction': kv_images.put(imageTriple[0], imageTriple[2]) labelTriple = labelLine.getNext() if labelTriple[1] == 'http://www.w3.org/2000/01/rdf-schema#label': labelWords = Stemmer.stem(labelTriple[2]).split() for word in labelWords: if word not in repeatedWords: kv_labels.putSort(word, str(0), labelTriple[0]) repeatedWords[word] = 1 else: kv_labels.putSort(word, str(repeatedWords[word]), labelTriple[0]) repeatedWords[word] += 1 # Close KeyValues Storages kv_labels.close() kv_images.close()
images = {} i += 1 db_images.putAll(images) # Process labels label = label_parser.getNext() i = 0 while (label): if label[1] == LABEL_TYPE: if label[0] in allImages: split = label[2].split() if len(split) > 1: for item in split: if Stemmer.stem(item) in allLabels: allLabels[Stemmer.stem(item)] += 1 else: allLabels[Stemmer.stem(item)] = 0 labels[Stemmer.stem(item)] = [ label[0], allLabels[Stemmer.stem(item)] ] if i % BATCH_LIMIT == 0: db_labels.putAll(labels) labels = {} i += 1 else: if Stemmer.stem(label[2]) in allLabels:
import keyvalue.sqlitekeyvalue as KeyValue import keyvalue.parsetriples as ParseTriple import keyvalue.stemmer as Stemmer # Make connections to KeyValue kv_labels = KeyValue.SqliteKeyValue("sqlite_labels.db", "labels", sortKey=True) kv_images = KeyValue.SqliteKeyValue("sqlite_images.db", "images", sortKey=True) # Process Logic. labelDataset = ParseTriple.ParseTriples('./datasets/labels_en.ttl') imageDataset = ParseTriple.ParseTriples('./datasets/images.ttl') for i in range(1, 1000): label = labelDataset.getNext() stemmer = Stemmer.stem(label[2]) for x in stemmer.split(' '): print("category: " + label[0] + " ____________ label:" + x) kv_labels.putSort(x, i, label[0]) for i in range(0, 1000): images = imageDataset.getNextImage() print("category: " + images[0] + " _________ imgPath: " + images[2]) kv_images.putSort(images[0], i, images[2]) # Close KeyValues Storages kv_labels.close() kv_images.close()
stemerWords = [] values = [] parserLables = ParseTripe.ParseTriples("./DataSet/labels_en.ttl") for i in range(0, 100): tuple = parserLables.getNext() if tuple[1] == 'http://www.w3.org/2000/01/rdf-schema#label': #print("key",tuple[2]) tuple = tuple[:1] + tuple[2:] key = tuple[1] #terms.append(tuple) #print("label", tuple) whiteSpace = " " in key if whiteSpace: subwords = key.split() for subword in subwords: key = Stemmer.stem(subword) stemerWords.append(key) values.append(tuple[0]) else: key = Stemmer.stem(key) stemerWords.append(key) values.append(tuple[0]) #print("stemerWords", stemerWords) #print("values", values) #print("stemerWords len:", len(stemerWords)) #print("values len:", len(values)) filteredStemerWords = []