def apply_model(in_file, out_dir, model, trained_model, arg_space_files,
                alpha, beta, lambda_, out_format):

    print "Reading in data..."
    in_descr = in_file.split("/")[-1] 
    
    if not model is None: 
        model_obj = create_model(model, alpha, beta, lambda_)
    else:
        model_obj = io_utils.load(trained_model, CompositionModel)
        
    model_descr = type(model_obj).__name__
     
    arg_space = io_utils.load(arg_space_files[0], Space)
    arg_space2 = None
    if len(arg_space_files) == 2:
        arg_space2 = io_utils.load(arg_space_files[1], Space)
    
    data = io_utils.read_tuple_list(in_file, fields=[0, 1, 2])
    
    print "Applying composition model:%s" % model_descr
    if arg_space2 is None or type(model_obj) is LexicalFunction:
        composed_space = model_obj.compose(data, arg_space)
    else:
        composed_space = model_obj.compose(data, (arg_space, arg_space2))
    
    print "Printing..."
    out_file = ".".join([out_dir + "/COMPOSED_SS", model_descr, in_descr])    
    io_utils.save(composed_space, "%s.pkl" % out_file)
    
    if not out_format is None:
        composed_space.export(out_file, format=out_format)
Beispiel #2
0
def train_model(in_file, out_dir, model, arg_space_files, phrase_space_file,
                regression, crossvalid, intercept, param, param_range,
                export_params):

    print "Reading in data..."
    in_descr = in_file.split("/")[-1]

    model_dict = {
        "weighted_add": WeightedAdditive,
        "full_add": FullAdditive,
        "lexical_func": LexicalFunction,
        "dilation": Dilation
    }
    learner_dict = {
        "ridge": RidgeRegressionLearner,
        "lstsq": LstsqRegressionLearner
    }

    arg_space = io_utils.load(arg_space_files[0], Space)
    arg_space2 = None
    if len(arg_space_files) == 2:
        arg_space2 = io_utils.load(arg_space_files[1], Space)

    phrase_space = io_utils.load(phrase_space_file, Space)

    if not model in model_dict:
        raise ValueError("Invalid model:%s for training" % model)

    model_cls = model_dict[model]
    if model_cls in (WeightedAdditive, Dilation):
        model_obj = model_cls()
    else:
        if regression == "ridge":
            regression_obj = learner_dict[regression](
                crossvalidation=crossvalid,
                intercept=intercept,
                param=param,
                param_range=param_range)
            model_obj = model_cls(learner=regression_obj)
        elif regression == "lstsq":
            regression_obj = learner_dict[regression](intercept=intercept)
            model_obj = model_cls(learner=regression_obj)

        else:
            model_obj = model_cls()

    train_data = io_utils.read_tuple_list(in_file, fields=[0, 1, 2])

    print "Training %s model" % model
    if arg_space2 is None or model == "lexical_func":
        model_obj.train(train_data, arg_space, phrase_space)
    else:
        model_obj.train(train_data, (arg_space, arg_space2), phrase_space)

    print "Printing..."
    out_file = ".".join([out_dir + "/TRAINED_COMP_MODEL", model, in_descr])
    io_utils.save(model_obj, "%s.pkl" % out_file)

    if export_params:
        model_obj.export("%s.params" % out_file)
def apply_model(in_file, out_dir, model, trained_model, arg_space_files, alpha,
                beta, lambda_, out_format):

    print("Reading in data...")
    in_descr = in_file.split("/")[-1]

    if not model is None:
        model_obj = create_model(model, alpha, beta, lambda_)
    else:
        model_obj = io_utils.load(trained_model, CompositionModel)

    model_descr = type(model_obj).__name__

    arg_space = io_utils.load(arg_space_files[0], Space)
    arg_space2 = None
    if len(arg_space_files) == 2:
        arg_space2 = io_utils.load(arg_space_files[1], Space)

    data = io_utils.read_tuple_list(in_file, fields=[0, 1, 2])

    print("Applying composition model:%s" % model_descr)
    if arg_space2 is None or type(model_obj) is LexicalFunction:
        composed_space = model_obj.compose(data, arg_space)
    else:
        composed_space = model_obj.compose(data, (arg_space, arg_space2))

    print("Printing...")
    out_file = ".".join([out_dir + "/COMPOSED_SS", model_descr, in_descr])
    io_utils.save(composed_space, "%s.pkl" % out_file)

    if not out_format is None:
        composed_space.export(out_file, format=out_format)
Beispiel #4
0
def train_model(in_file, out_dir, model, arg_space_files, phrase_space_file, regression,
                crossvalid, intercept, param, param_range, export_params):

    print "Reading in data..."
    in_descr = in_file.split("/")[-1]

    model_dict = {"weighted_add": WeightedAdditive,
                  "full_add": FullAdditive,
                  "lexical_func": LexicalFunction,
                  "dilation": Dilation
                  }
    learner_dict = {"ridge": RidgeRegressionLearner,
                    "lstsq": LstsqRegressionLearner
                    }

    arg_space = io_utils.load(arg_space_files[0], Space)
    arg_space2 = None
    if len(arg_space_files) == 2:
        arg_space2 = io_utils.load(arg_space_files[1], Space)

    phrase_space = io_utils.load(phrase_space_file, Space)

    if not model in model_dict:
        raise ValueError("Invalid model:%s for training" % model)

    model_cls = model_dict[model]
    if model_cls in (WeightedAdditive, Dilation):
        model_obj = model_cls()
    else:
        if regression == "ridge":
            regression_obj = learner_dict[regression](crossvalidation=crossvalid,
                                                       intercept=intercept,
                                                       param=param,
                                                       param_range=param_range)
            model_obj = model_cls(learner=regression_obj)
        elif regression == "lstsq":
            regression_obj = learner_dict[regression](intercept=intercept)
            model_obj = model_cls(learner=regression_obj)

        else:
            model_obj = model_cls()

    train_data = io_utils.read_tuple_list(in_file, fields=[0, 1, 2])

    print "Training %s model" % model
    if arg_space2 is None or model == "lexical_func":
        model_obj.train(train_data, arg_space, phrase_space)
    else:
        model_obj.train(train_data, (arg_space, arg_space2), phrase_space)

    print "Printing..."
    out_file = ".".join([out_dir + "/TRAINED_COMP_MODEL", model, in_descr])
    io_utils.save(model_obj, "%s.pkl" % out_file)

    if export_params:
        model_obj.export("%s.params" % out_file)
from composes.composition.multiplicative import Multiplicative
from composes.transformation.scaling.row_normalization import RowNormalization
import numpy as np
import sys

#read in a space
my_space = io_utils.load(sys.argv[1])
my_space = my_space.apply(RowNormalization())

add = WeightedAdditive(alpha = 1, beta = 1)
mult = Multiplicative()


#compute multiplication/addition of a list of word pairs
fname = sys.argv[2]
word_pairs = io_utils.read_tuple_list(fname, fields=[0,1])

lengths=[]
found=True
for wp in word_pairs:
	try:
		v1=my_space.get_row(wp[0])
		v2=my_space.get_row(wp[1])
	except KeyError:
		#print wp[0],"or",wp[1],"not found"
		found=False
	if found:
		composed_space = add.compose([(wp[0], wp[1], "_composed_")], my_space)
		neighbours=composed_space.get_neighbours("_composed_", 10, CosSimilarity(),space2=my_space)
		print wp[0],wp[1]
		print neighbours
Beispiel #6
0
print "Applying feature selection..."
space = space.apply(TopFeatureSelection(2000))

print "Applying SVD..."
space = space.apply(Svd(100))

print "Creating peripheral space.."
per_space = PeripheralSpace.build(space,
                                  data=data_path + "per.raw.SV.sm",
                                  cols=data_path + "per.raw.SV.cols",
                                  format="sm")

#reading in train data
train_data_file = data_path + "ML08_SV_train.txt"
train_data = io_utils.read_tuple_list(train_data_file, fields=[0, 1, 2])

print "Training Lexical Function composition model..."
comp_model = LexicalFunction(learner=RidgeRegressionLearner(param=2))
comp_model.train(train_data, space, per_space)

print "Composing phrases..."
test_phrases_file = data_path + "ML08nvs_test.txt"
test_phrases = io_utils.read_tuple_list(test_phrases_file, fields=[0, 1, 2])
composed_space = comp_model.compose(test_phrases, space)

print "Reading similarity test data..."
test_similarity_file = data_path + "ML08data_new.txt"
test_pairs = io_utils.read_tuple_list(test_similarity_file, fields=[0, 1])
gold = io_utils.read_list(test_similarity_file, field=2)
Beispiel #7
0
#ex20.py
#-------
from composes.utils import io_utils
from composes.utils import scoring_utils
from composes.similarity.cos import CosSimilarity

#read in a space
my_space = io_utils.load("data/out/ex01.pkl")

#compute similarities of a list of word pairs
fname = "data/in/word_sims.txt"
word_pairs = io_utils.read_tuple_list(fname, fields=[0, 1])
predicted = my_space.get_sims(word_pairs, CosSimilarity())

#compute correlations
gold = io_utils.read_list(fname, field=2)
print "Spearman"
print scoring_utils.score(gold, predicted, "spearman")
print "Pearson"
print scoring_utils.score(gold, predicted, "pearson")
Beispiel #8
0
print "Applying feature selection..."
space = space.apply(TopFeatureSelection(2000))

print "Applying SVD..."
space = space.apply(Svd(100))

print "Creating peripheral space.."
per_space = PeripheralSpace.build(space,
                                  data = data_path + "per.raw.SV.sm",
                                  cols = data_path + "per.raw.SV.cols",
                                  format = "sm"                                
                                  )

#reading in train data
train_data_file = data_path + "ML08_SV_train.txt"
train_data = io_utils.read_tuple_list(train_data_file, fields=[0,1,2])

print "Training Lexical Function composition model..."
comp_model = LexicalFunction(learner = RidgeRegressionLearner(param=2))
comp_model.train(train_data, space, per_space)

print "Composing phrases..."
test_phrases_file = data_path + "ML08nvs_test.txt" 
test_phrases = io_utils.read_tuple_list(test_phrases_file, fields=[0,1,2])
composed_space = comp_model.compose(test_phrases, space)

print "Reading similarity test data..."
test_similarity_file = data_path + "ML08data_new.txt"
test_pairs = io_utils.read_tuple_list(test_similarity_file, fields=[0,1])
gold = io_utils.read_list(test_similarity_file, field=2)