def store(num_imag): #Loading images and labels from file images = ParseTripe.ParseTriples("images.ttl") labels = ParseTripe.ParseTriples("labels_en.ttl") try: imagesTable = dynamodbClient.describe_table(TableName="images") except dynamodbClient.exceptions.ResourceNotFoundException: imagesTable = createImagesTable('images') try: labelsTable = dynamodbClient.describe_table(TableName="labels") except dynamodbClient.exceptions.ResourceNotFoundException: labelsTable = createImagesTable('labels') tempImages = {} num = int(num_imag) while(num > 0): line = images.getNext() if "depiction" in line[1]: if imagesTable.get(line[0]) is None: tempImages[line[0]] = 1 else: tempImages[line[0]] += 1 put('images', line[0], str(tempImages[line[0]]), line[2]) num -= 1 tag = {} label = labels.getNext() print("creating labels") while(label): if label[1] == "http://www.w3.org/2000/01/rdf-schema#label": #label[0] in tempImages and labelsTable = label[2].split(" ") for entry in labelsTable: #Save stem word entry = Stemmer.stem(entry) #if there is already an entry with the same label then increases the sort value if tag.get(entry) is None: tag[entry] = 0 else: tag[entry] += 1 #save sort word: table, keyword, inx and value put('labels', entry, str(tag[entry]), label[0]) label = labels.getNext()
import keyvalue.sqlitekeyvalue as KeyValue import keyvalue.parsetriples as ParseTriple import keyvalue.stemmer as Stemmer import sys # Make connections to KeyValue kv_labels = KeyValue.SqliteKeyValue("sqlite_labels.db", "labels", sortKey=True) kv_images = KeyValue.SqliteKeyValue("sqlite_images.db", "images") # Process Logic. imageLine = ParseTriple.ParseTriples('./dataset/images.ttl') labelLine = ParseTriple.ParseTriples('./dataset/labels_en.ttl') repeatedWords = {} for i in range(int(sys.argv[1])): imageTriple = imageLine.getNext() if imageTriple[1] == 'http://xmlns.com/foaf/0.1/depiction': kv_images.put(imageTriple[0], imageTriple[2]) labelTriple = labelLine.getNext() if labelTriple[1] == 'http://www.w3.org/2000/01/rdf-schema#label': labelWords = Stemmer.stem(labelTriple[2]).split() for word in labelWords: if word not in repeatedWords: kv_labels.putSort(word, str(0), labelTriple[0]) repeatedWords[word] = 1 else: kv_labels.putSort(word, str(repeatedWords[word]), labelTriple[0]) repeatedWords[word] += 1
import keyvalue.dynamostorage as DynamoStorage import keyvalue.parsetriples as ParseTriple import keyvalue.stemmer as Stemmer import sys # Make connections to KeyValue kv_labels = DynamoStorage.DynamoStorage("labels") kv_images = DynamoStorage.DynamoStorage("images") # Process Logic. image_line = ParseTriple.ParseTriples("./dataset/images.ttl") label_line = ParseTriple.ParseTriples("./dataset/labels_en.ttl") quantity = int(sys.argv[1]) value_set = set() kv_images.put_many_images(image_line, quantity, value_set) kv_labels.put_many_labels(label_line, quantity, value_set, Stemmer)
import keyvalue.parsetriples as ParseTriples import keyvalue.stemmer as Stemmer import keyvalue.dynamostorage as dynamo from botocore.exceptions import ClientError kv_labels = dynamo.DynamodbKeyValue('terms') kv_images = dynamo.DynamodbKeyValue('images') # Process Logic. parse_images = ParseTriples.ParseTriples('./data/images.ttl') parse_labels = ParseTriples.ParseTriples('./data/labels_en.ttl') # Insert Images for i in range(10000): line = parse_images.getNext() category = line[0] B = line[1] imageURL = line[2] if B == 'http://xmlns.com/foaf/0.1/depiction': kv_images.put(category, i, imageURL) # Insert Labels for i in range(5000): line = parse_labels.getNext() category = line[0] B = line[1] terms = line[2] if B == 'http://www.w3.org/2000/01/rdf-schema#label': for token in terms.split(' '): stemmedWord = Stemmer.stem(token) kv_labels.put(stemmedWord, i, category)
import keyvalue.sqlitekeyvalue as KeyValue import keyvalue.parsetriples as ParseTripe import keyvalue.stemmer as Stemmer # Make connections to KeyValue kv_labels = KeyValue.SqliteKeyValue("sqlite_labels.db", "labels", sortKey=True) kv_images = KeyValue.SqliteKeyValue("sqlite_images.db", "images") # Process Algorithm. #Read Images and Labels from the file parseTriplesImages = ParseTripe.ParseTriples("images.ttl") parseTriplesLabels = ParseTripe.ParseTriples("labels_en.ttl") #Parse Images to the Image Collection #Only the first 1000 images countImages = 0 while (countImages <= 1000): triple = parseTriplesImages.getNext() #print(triple) if (triple[1] == 'http://xmlns.com/foaf/0.1/depiction'): kv_images.put(triple[0], triple[2]) countImages += 1 #Parse Labels to the Label Collection #Store sortKey per keys terms = {} #Get the first label label = parseTriplesLabels.getNext()
import keyvalue.dynamostorage as KeyValue import keyvalue.parsetriples as ParseTriple import keyvalue.stemmer as Stemmer from botocore.exceptions import ClientError # Make connections to KeyValue kv_labels = KeyValue.DynamoDB("labels") kv_images = KeyValue.DynamoDB("images") try: # Process Logic. print("-------------------------------------------") labelsDataset = ParseTriple.ParseTriples('./datasets/labels_en.ttl') for i in range(1, 1000): word = labelsDataset.getNext() category = word[0] label = word[2] steam = Stemmer.stem(label) for x in steam.split(' '): print("category: " + category + " ____________ labe:" + x) kv_labels.put(x, len(x), category) print("-------------------------------------------") print("-------------------------------------------") imagesDataset = ParseTriple.ParseTriples('./datasets/images.ttl') for i in range(0, 2000): img = imagesDataset.getNextImage() category = img[0] imgPath = img[2]
import keyvalue.sqlitekeyvalue as KeyValue import keyvalue.parsetriples as ParseTripe import keyvalue.stemmer as Stemmer import keyvalue.dynamostorage as DynamoStorage BATCH_LIMIT = 24 IMAGE_LIMIT = 2000 IMAGES_FILE = "images.ttl" LABELS_FILE = "labels_en.ttl" AWS_REGION = "us-east-1" IMAGE_TYPE = "http://xmlns.com/foaf/0.1/depiction" LABEL_TYPE = "http://www.w3.org/2000/01/rdf-schema#label" # Init parsers image_parser = ParseTripe.ParseTriples(IMAGES_FILE) label_parser = ParseTripe.ParseTriples(LABELS_FILE) # Make connections to Dynamodb db_images = DynamoStorage.DynamoStorage('images', region=AWS_REGION) db_labels = DynamoStorage.DynamoStorage('terms', True, AWS_REGION) # Internal storage allImages = {} allLabels = {} images = {} labels = {} # Process images i = 0 for x in range(IMAGE_LIMIT): image = image_parser.getNext()
import keyvalue.sqlitekeyvalue as KeyValue import keyvalue.parsetriples as ParseTripe import keyvalue.stemmer as Stemmer # Make connections to KeyValue kv_labels = KeyValue.SqliteKeyValue("sqlite_labels.db","labels",sortKey=True) kv_images = KeyValue.SqliteKeyValue("sqlite_images.db","images") # Process Logic. #reading ttl files images = ParseTripe.ParseTriples("images.ttl") #setting a limit of 100 images to load i = 0 while (i <= 400): image = images.getNext() #filter if the image is not a thumbnail if "depiction" in image[1]: kv_images.put(image[0], image[2]) #i only increases if the image is not a thumbnail i += 1 labels = ParseTripe.ParseTriples("labels_en.ttl") label = labels.getNext() tag = {} while(label): #Check if the image on the label is inside of the kv_images if kv_images.get(label[0]):
import keyvalue.parsetriples as ParseTripe import keyvalue.stemmer as Stemmer # Make connections to KeyValue kv_labels = KeyValue.SqliteKeyValue("db/sqlite_labels.db", "labels", sortKey=True) kv_images = KeyValue.SqliteKeyValue("db/sqlite_images.db", "images") # Process Logic. images = [] terms = [] temp_cat = [] temp = 0 images_ttl = ParseTripe.ParseTriples("resources/images.ttl") labels_ttl = ParseTripe.ParseTriples("resources/labels_en.ttl") while temp < 100: temp_images = images_ttl.getNext() if (temp_images[1] == "http://xmlns.com/foaf/0.1/depiction"): temp_arr = [temp_images[0], temp_images[2]] images.append(temp_arr) kv_images.put(str(temp_images[0]), str(temp_images[2])) if not temp_images[0] in temp_cat: temp_cat.append(temp_images[0]) temp += 1 temp = 0 while temp < 100: temp_labels = labels_ttl.getNext() if (temp_labels[1] == "http://www.w3.org/2000/01/rdf-schema#label"):
import keyvalue.parsetriples as ParseTriples import keyvalue.stemmer as Stemmer import keyvalue.dynamostorage as Dynamo from botocore.exceptions import ClientError kv_labels = Dynamo.DynamodbKeyValue('labels') kv_images = Dynamo.DynamodbKeyValue('images') # Process Logic. parse_images = ParseTriples.ParseTriples('../Dataset/images.ttl') parse_labels = ParseTriples.ParseTriples('../Dataset/labels_en.ttl') # Insert Images for i in range(2000): line = parse_images.getNext() category = line[0] b = line[1] imageURL = line[2] if b == 'http://xmlns.com/foaf/0.1/depiction': kv_images.put(category, len(category), imageURL) else: i = i - 1 #Labels for i in range(5000): line = parse_labels.getNext() category = line[0] b = line[1] terms = line[2] findImage = kv_images.get(category, len(category)) if b == 'http://www.w3.org/2000/01/rdf-schema#label' and findImage is not None:
def dynamoStorage(num): #GET and/or create Images Table try: images = dynamodbClient.describe_table(TableName="images") except dynamodbClient.exceptions.ResourceNotFoundException: print('Could not found `images` table') images = createImagesTable('images') print('Images table load') #GET and/or create Labels try: labels = dynamodbClient.describe_table(TableName="labels") except dynamodbClient.exceptions.ResourceNotFoundException: print('Could not found `labels` table') labels = createImagesTable('labels') print('Labels table load') #Read Images and Labels from the file parseTriplesImages = ParseTripe.ParseTriples("images.ttl") parseTriplesLabels = ParseTripe.ParseTriples("labels_en.ttl") print('Images file load') print('Labels file load') images = {} #Filter and upload images countImages = 1 #TODO - verify this validation num = int(num) while (countImages <= num): triple = parseTriplesImages.getNext() if (triple[1] == 'http://xmlns.com/foaf/0.1/depiction'): if (images.get(triple[0]) is None): images[triple[0]] = 1 else: images[triple[0]] += 1 put('images', triple[0], str(images[triple[0]]), triple[2]) countImages += 1 print('Images stored in DynamoDB') terms = {} #Filter and upload labels label = parseTriplesLabels.getNext() while (label): if (label[0] in images and label[1] == 'http://www.w3.org/2000/01/rdf-schema#label'): labels = label[2].split() for l in labels: l = Stemmer.stem(l) if (terms.get(l) is None): terms[l] = 1 else: terms[l] += 1 put('labels', l, str(terms[l]), label[0]) label = parseTriplesLabels.getNext() print('Labels stored in DynamoDB')
import keyvalue.sqlitekeyvalue as KeyValue import keyvalue.parsetriples as ParseTriple import keyvalue.stemmer as Stemmer # Make connections to KeyValue kv_labels = KeyValue.SqliteKeyValue("sqlite_labels.db", "labels", sortKey=True) kv_images = KeyValue.SqliteKeyValue("sqlite_images.db", "images") # Process Logic. parserImages = ParseTriple.ParseTriples("images.ttl") parserLabels = ParseTriple.ParseTriples("labels_en.ttl") ############### Poblando sqlite_images.db ############### images = [] contadorImagenes = 0 while contadorImagenes < 500: tupla = parserImages.getNext() if tupla != None and tupla[1] == "http://xmlns.com/foaf/0.1/depiction": #Le quito <B> de -> <A><B><C>: tupla = tupla[:1] + tupla[2:] images.append(tupla) #print("Inserted in images:",tupla[0], "->", tupla[1]) kv_images.put(tupla[0], tupla[1]) contadorImagenes += 1 ############### Poblando sqlite_labels.db ############### #print("NOW FROM LABELS ******************************************") etiquetas = [] stemmedWords = [] stemmedWordsIndexes = [] contadorLabels = 0 while contadorLabels < 5000: tupla = parserLabels.getNext()
import keyvalue.sqlitekeyvalue as KeyValue import keyvalue.parsetriples as ParseTripe import keyvalue.stemmer as Stemmer from dynamoDB.dynamodb import Dynamodb # Make connections to KeyValue kv_labels = KeyValue.SqliteKeyValue("sqlite_labels.db", "labels", sortKey=True) kv_images = KeyValue.SqliteKeyValue("sqlite_images.db", "images") dynamoDB = Dynamodb('C:\\Users\\joaqu\\Desktop\\Cloud1\\dynamoDB\\config.json') print("Se han creado las tablas") # Process Logic. images = [] parserImg = ParseTripe.ParseTriples("./DataSet/images.ttl") imagesToLoad = 100 #cantidad de imágenes a cargar for i in range(0, imagesToLoad): tuple = parserImg.getNext() if tuple[1] == "http://xmlns.com/foaf/0.1/depiction": #mapenado en la tupla el link con la imagen tuple = tuple[:1] + tuple[2:] #print("tupla", tuple) images.append(tuple) #kv_images.put(tuple[0], tuple[1]) print("images", images) print("images len:", len(images)) #testing #print(kv_images.get('http://wikidata.dbpedia.org/resource/Q18'))