def createTrainMatrices(voc): s = getStopwords() idf = np.zeros(len(voc)) trainingimages = [] trainingsentences = [] dp = getDataProvider('flickr30k') currentPair = 0 current_image = "" current_sentence = [] for pair in dp.iterImageSentencePair(): if currentPair % 1000 == 0: print "Current pair : " + str(currentPair) img_name = pair['image']['filename'] new_sentence = pair['sentence']['tokens'] img = pair['image']['feat'] if(img_name is current_image): current_sentence=current_sentence + new_sentence else: current_image=img_name sentence = getFullSentence(current_sentence, voc, s) current_sentence = new_sentence if np.linalg.norm(sentence) > 0: for i in range(len(sentence)): if sentence[i] > 0: idf[i] += 1 trainingimages.append(img) trainingsentences.append(sentence) currentPair += 1 trainingsentences = np.array(trainingsentences) trainingimages = np.array(trainingimages) for i in range(len(trainingsentences)): trainingsentences[i] = trainingsentences[i] * idf return trainingimages, trainingsentences
def mainExec(name_file, features): ''' Based on a list of image names and image features, learn a CCA model based on Stacked Auxiliary Embedding and save this model to disk. :param name_file :param features :return: ''' print "Creating vocabulary" voc = readVocabulary() print "Generating document vectors" occurrenceVectors, idf = createOccurrenceVectors(voc) print "Weighing vectors" weightedVectors = weight_tfidf(occurrenceVectors, idf) sentenceMatrix = [] imagematrix = [] print "Creating matrices" currentSentence = 0 for i in weightedVectors.keys(): if isLargeEnough(i): currentSentence += 1 print "current Sentence: " + str(currentSentence) for j in range(len(weightedVectors[i])): weightedVectors[i][j] = float(weightedVectors[i][j]) if currentSentence == 1: sentenceMatrix = weightedVectors[i] imagematrix = getImage(i,name_file, features) elif currentSentence ==2: sentenceMatrix = np.concatenate(([sentenceMatrix], [weightedVectors[i]]), axis = 0) imagematrix = np.concatenate(([imagematrix], [getImage(i,name_file, features)]), axis = 0) else: sentenceMatrix = np.concatenate((sentenceMatrix, [weightedVectors[i]]), axis = 0) imagematrix = np.concatenate((imagematrix, [getImage(i,name_file, features)]), axis = 0) print "Modelling cca" cca = CCA(n_components=128) cca.fit(sentenceMatrix, imagematrix) pickle.dump(cca, open("ccasnippetmodel.p",'w+')) idf = np.zeros(len(voc)) trainingimages = [] trainingsentences = [] dp = getDataProvider('flickr30k') currentPair = 0 for pair in dp.sampleImageSentencePair(): currentPair += 1 if currentPair % 100 == 0: print "Current pair: " + str(currentPair) img = pair['image']['feat'] trainingimages.append(img) sentence = getFullSentence(pair) for i in range(len(sentence)): if sentence[i] > 0: idf[i] += 1 trainingsentences.append(sentence) for i in range(len(trainingsentences)): trainingsentences[i] = trainingsentences[i]*idf trans_img, trans_sent = cca.transform(trainingimages, trainingsentences) nn_img = nearest_neighbor(trainingimages) nn_sent = nearest_neighbor(trainingsentences) augmented_imgs = [] augmented_sentences = [] for i in range(len(trans_img)): augm_img = trainingimages[i].extend(phi(3000,nn_img, trans_img[i])) augmented_imgs.append(augm_img) for i in range(len(trans_sent)): augm_sent = trainingsentences[i].extend(phi(3000, nn_sent, trans_sent[i])) augmented_sentences.append(augm_sent) augmentedcca = CCA(n_components= 96) augmentedcca.fit(augmented_sentences, augmented_imgs) pickle.dump(cca, open("augmentedcca.p",'w+'))