import gensim import atire atire.init("atire") input = [] COLLECTION_SIZE = 173251 for i in range(0, COLLECTION_SIZE): input.append(atire.get_ordered_tokens(i)) model = gensim.models.Word2Vec(sentences=input, size=300,window=5,workers=4,min_count=1,iter=30) model.save('wsj-collection-vectors') # TODO: Re-train Word2Vec model, it's not getting numbers in at the moment.Do it Friday night. Train for 30 iterations print(atire.get_ordered_tokens(63615)) print ("done") loaded_models = gensim.models.Word2Vec.load("wsj-collection-vectors") # # print (loaded_models.most_similar('money'))
WORD_EMBEDDING_PATH = "wsj-collection-vectors" METRIC = " -mMAP@40" annealing_steps = 10000. start_eps = 1.0 end_eps = 0.1 eps = start_eps stepDrop = (start_eps - end_eps) / annealing_steps PADDING = np.zeros(WORD_VECTOR_DIMENSIONS) random.seed(500) # HYPERPARAMETERS: atire.init("atire -a " + ASSESSMENT_FILE + METRIC) def write_to_file(filename, information): """ :param filename: :param v: a 2d list of training information :return: """ with open(filename, "a") as f: message = "" for row in information: message += str(row[0]) + str(row[1]).strip('[]') + " " message += "\n"
import atire atire.init("atire -a " ) atire.init("atire -a " )