Ejemplo n.º 1
0
# mapping from trial identifiers to corresponding input filenames
trial_input_fnames = id2filenames(trial_dir, "input", trial_ids)

# mapping from trial identifiers to corresponding gold standard filenames
trial_gs_fnames = id2filenames(trial_dir, "gs", trial_ids)

# mappings, ids and path for the 2014 test data
test_dir = join(data_dir, "STS2014-test")
test_ids = trial_ids

test_input_fnames = id2filenames(test_dir, "input", test_ids)
test_gs_fnames = id2filenames(test_dir, "gs", test_ids)

# mapping from test dataset identifiers and feature names
# to the corresponding feature files
test_feat_fnames = map_id_to_feat_files(os.path.join(feat_dir, 'STS2014-test'),
                                        test_ids)


def read_test_data(ids, features=[], convert_nan=True):
    """
    Create feature vectors and labels for given dataset identifiers and
    features from STS13 test data
    """
    return read_data(ids,
                     test_feat_fnames,
                     test_gs_fnames,
                     features=features,
                     convert_nan=convert_nan)


def read_blind_test_data(ids, features=[], convert_nan=True):
Ejemplo n.º 2
0
define dirs, ids and filenames for STS13 test data
"""
import os

from os.path import join
from ntnu.io import read_data, map_id_to_feat_files, feat_dir

from sts.io import data_dir, id2filenames

# directory containing original STS 2103 test files
test_dir = join(data_dir, "STS2013-test")

# identifiers for different categories of test data
test_ids = "FNWN", "headlines", "OnWN", "SMT"

# mapping from test identifiers to corresponding input filenames
test_input_fnames = id2filenames(test_dir, "input", test_ids)

# mapping from test identifiers to corresponding gold standard filenames
test_gs_fnames = id2filenames(test_dir, "gs", test_ids)

test_feat_fnames = map_id_to_feat_files(os.path.join(feat_dir, 'STS2013-test'), test_ids)

def read_test_data(ids, features=[], convert_nan=True):
    """
    Create feature vectors and labels for given dataset identifiers and
    features from STS12 test data
    """
    return read_data(ids, test_feat_fnames, test_gs_fnames,
                     features=features, convert_nan=convert_nan )
Ejemplo n.º 3
0
"""
define dirs and filenames of features for STS12 data
"""

from os.path import join

from sts.sts12 import train_ids, test_ids, train_gs_fnames, test_gs_fnames

from ntnu.io import feat_dir, read_data, map_id_to_feat_files

# top directory containing train feature files
train_dir = join(feat_dir, "STS2012-train")

# mapping from train dataset identifiers and feature names
# to the corresponding feature files
train_feat_fnames = map_id_to_feat_files(train_dir, train_ids)


def read_train_data(ids, features=[], convert_nan=True):
    """
    Create feature vectors and labels for given dataset identifiers and
    features from STS12 train data
    """
    return read_data(ids,
                     train_feat_fnames,
                     train_gs_fnames,
                     features=features,
                     convert_nan=convert_nan)


# top directory containing test feature files
Ejemplo n.º 4
0
define dirs and filenames of features for STS14 data
"""

from os.path import join

from sts.sts14 import trial_ids, trial_gs_fnames

from ntnu.io import feat_dir, read_data, read_blind_data, map_id_to_feat_files


# top directory containing trial feature files
trial_dir = join(feat_dir, "STS2014-trial")

# mapping from test dataset identifiers and feature names 
# to the corresponding feature files
trial_feat_fnames = map_id_to_feat_files(trial_dir, trial_ids)


def read_trial_data(ids, features=[], convert_nan=True):    
    """
    Create feature vectors and labels for given dataset identifiers and
    features from STS14 trial data
    """    
    return read_data(ids, trial_feat_fnames, trial_gs_fnames,
                     features=features, convert_nan=convert_nan)


def read_blind_trial_data(ids, features=[], convert_nan=True):
    return read_blind_data(ids, trial_feat_fnames, features=features,
                           convert_nan=convert_nan )
Ejemplo n.º 5
0
"""

from os.path import join

from sts.sts12 import train_ids, test_ids, train_gs_fnames, test_gs_fnames

from ntnu.io import feat_dir, read_data, map_id_to_feat_files



# top directory containing train feature files
train_dir = join(feat_dir, "STS2012-train")

# mapping from train dataset identifiers and feature names 
# to the corresponding feature files
train_feat_fnames = map_id_to_feat_files(train_dir, train_ids)

def read_train_data(ids, features=[], convert_nan=True):    
    """
    Create feature vectors and labels for given dataset identifiers and
    features from STS12 train data
    """    
    return read_data(ids, train_feat_fnames, train_gs_fnames,
                     features=features, convert_nan=convert_nan)



# top directory containing test feature files
test_dir = join(feat_dir, "STS2012-test")

# mapping from test dataset identifiers and feature names 
Ejemplo n.º 6
0
"""
define dirs and filenames of features for STS14 data
"""

from os.path import join

from sts.sts14 import trial_ids, trial_gs_fnames

from ntnu.io import feat_dir, read_data, read_blind_data, map_id_to_feat_files

# top directory containing trial feature files
trial_dir = join(feat_dir, "STS2014-trial")

# mapping from test dataset identifiers and feature names
# to the corresponding feature files
trial_feat_fnames = map_id_to_feat_files(trial_dir, trial_ids)


def read_trial_data(ids, features=[], convert_nan=True):
    """
    Create feature vectors and labels for given dataset identifiers and
    features from STS14 trial data
    """
    return read_data(ids,
                     trial_feat_fnames,
                     trial_gs_fnames,
                     features=features,
                     convert_nan=convert_nan)


def read_blind_trial_data(ids, features=[], convert_nan=True):
Ejemplo n.º 7
0
define dirs and filenames of features for STS13 data
"""

from os.path import join

from sts.sts13 import test_ids, test_gs_fnames

from ntnu.io import feat_dir, read_data, read_blind_data, map_id_to_feat_files


# top directory containing test feature files
test_dir = join(feat_dir, "STS2013-test")

# mapping from test dataset identifiers and feature names 
# to the corresponding feature files
test_feat_fnames = map_id_to_feat_files(test_dir, test_ids)


def read_test_data(ids, features=[], convert_nan=True):    
    """
    Create feature vectors and labels for given dataset identifiers and
    features from STS13 test data
    """    
    return read_data(ids, test_feat_fnames, test_gs_fnames,
                     features=features, convert_nan=convert_nan)


def read_blind_test_data(ids, features=[], convert_nan=True):
    return read_blind_data(ids, test_feat_fnames, features=features,
                           convert_nan=convert_nan )