vecFile = sys.argv[4] pathToSVMFile = sys.argv[5] pathToExpansionCache = sys.argv[6] pathToOutput = sys.argv[7] # open the rel rel = shelve.open(relFile) # open the vectors print "Loading vectors" vecs = load_vectors(vecFile) # read clusters and get their cluster centers by taking the average... print "Reading agglomerative cluster centers" clusterCenters = [ getAverageWordRep(x, vecs) for x in read_sets(clusterFile) ] # IT MIGHT HAPPEN THAT SOME CLUSTER CENTERS ARE ()? HOW IS THIS POSSIBLE? # set some remaining parameters expansion = 5 window = 5 svmFileInfo = '_SVM_' + clusterFile.split( '/')[-1] + "_expansionParam" + str(expansion) + "_window" + str(window) expansionCacheInfo = "_expansionParam_" + str(expansion) wordsOfInterest = [x.split("_")[0] for x in os.listdir(pathToSVMFile)] #print wordsOfInterest f = open(pathToOutput, 'r') StartIndex = len(f.readline().split(" ")) - 10 print "Start index: ", StartIndex
pathToNormalVectors = sys.argv[7] expansion = 5 window = 5 svmFileInfo = '_SVM_' + clusterFile.split('/')[-1] + "_expansionParam" + str(expansion) + "_window" + str(window) expansionCacheInfo = "_expansionParam_" + str(expansion) print "Loading rel, task, vector, words that have been disambiguated" rel = shelve.open(relFile) task, tralala = load_task(taskFilename) vectors = load_vectors(vectorsFilename) normalVectors = load_vectors(pathToNormalVectors) disambiguatedWords = [x.split("_")[0] for x in os.listdir(pathToSVMFile)] print "Reading agglomerative cluster centers" clusterCenters = [getAverageWordRep(x, vectors) for x in read_sets(clusterFile)] print "Starting..." # initiate empty ratings methodsRating = [] humanRating = [] questions = task.values() jointVocCache = dict() partVoc = set(vectors.keys()) print len(disambiguatedWords), "disambiguated words" done = 0 for i in xrange(len(questions)): question = questions[i]
normalVectorsFile = sys.argv[7] expansion = 5 window = 5 svmFileInfo = '_SVM_' + clusterFile.split('/')[-1] + "_expansionParam" + str(expansion) + "_window" + str(window) expansionCacheInfo = "_expansionParam_" + str(expansion) print "Loading rel, task, vector, words that have been disambiguated" rel = shelve.open(relFile) task, tralala = load_task(taskFilename) vectors = load_vectors(vectorsFilename) normalVectors = load_vectors(normalVectorsFile) disambiguatedWords = [x.split("_")[0] for x in os.listdir(pathToSVMFile)] print "Reading agglomerative cluster centers" clusterCenters = [getAverageWordRep(x, vectors) for x in read_sets(clusterFile)] print "Starting..." # initiate empty ratings methodsRating = [] humanRating = [] questions = task.values() jointVocCache = dict() partVoc = set(vectors.keys()) print len(disambiguatedWords), "disambiguated words" done = 0 for i in xrange(len(questions)): question = questions[i]
pathToSVMFile = sys.argv[5] pathToExpansionCache = sys.argv[6] pathToTask = sys.argv[7] alreadyDisambiguatedWords = set([x.split("_")[0] for x in os.listdir(pathToSVMFile)]) # open the rel rel = shelve.open(relFile) # open the vectors print "Loading vectors" vecs = load_vectors(vecFile) # read clusters and get their cluster centers by taking the average... print "Reading agglomerative cluster centers" agglomerativeClusterCenters = [getAverageWordRep(x, vecs) for x in read_sets(clusterFile)] # set some parameters expansion = 5 window = 5 # get the words that occur in the task and need to be compared _, wordsToSplit = load_task(pathToTask) indexCache = dict() wordsToSplit = filter(lambda x: x not in alreadyDisambiguatedWords, wordsToSplit) total = len(wordsToSplit) for i, word in enumerate(wordsToSplit): # progess