Beispiel #1
0
import gensim
import atire



atire.init("atire")


input = []
COLLECTION_SIZE = 173251



for i in range(0, COLLECTION_SIZE):
    input.append(atire.get_ordered_tokens(i))

model = gensim.models.Word2Vec(sentences=input, size=300,window=5,workers=4,min_count=1,iter=30)
model.save('wsj-collection-vectors')

# TODO: Re-train Word2Vec model, it's not getting numbers in at the moment.Do it Friday night. Train for 30 iterations
print(atire.get_ordered_tokens(63615))


print ("done")


loaded_models = gensim.models.Word2Vec.load("wsj-collection-vectors")
#
#
print (loaded_models.most_similar('money'))
Beispiel #2
0
WORD_EMBEDDING_PATH = "wsj-collection-vectors"

METRIC = " -mMAP@40"

annealing_steps = 10000.
start_eps = 1.0
end_eps = 0.1
eps = start_eps
stepDrop = (start_eps - end_eps) / annealing_steps

PADDING = np.zeros(WORD_VECTOR_DIMENSIONS)

random.seed(500)

# HYPERPARAMETERS:
atire.init("atire -a " + ASSESSMENT_FILE + METRIC)


def write_to_file(filename, information):
    """

    :param filename:
    :param v: a 2d list of training information
    :return:
    """
    with open(filename, "a") as f:
        message = ""
        for row in information:
            message += str(row[0]) + str(row[1]).strip('[]') + " "

        message += "\n"
import atire

atire.init("atire -a " )
atire.init("atire -a " )