Ejemplo n.º 1
0
    def iterate_embeddings(self):
        data_ud = util.read_data(self.input_name_base % (self.mode, 'ud'))
        data_embeddings = util.read_data(self.input_name_base %
                                         (self.mode, self.representation))

        for (sentence_ud, _), sentence_emb in zip(data_ud, data_embeddings):
            yield sentence_ud, sentence_emb
Ejemplo n.º 2
0
    def load_data(self):
        data_ud = util.read_data(self.input_name_base % (self.mode, 'ud'))
        data_embeddings = util.read_data(self.input_name_base %
                                         (self.mode, self.representation))

        x_raw, y_raw = [], []
        for (sentence_ud, words), (sentence_emb,
                                   _) in zip(data_ud, data_embeddings):
            for i, token in enumerate(sentence_ud):
                pos_tag = token['pos']

                if pos_tag == "_" or pos_tag == "X":
                    continue

                x_raw += [sentence_emb[i]]
                y_raw += [pos_tag]

        x_raw = np.array(x_raw)
        y_raw = np.array(y_raw)

        return x_raw, y_raw
Ejemplo n.º 3
0
    def load_data(self):
        data_ud = util.read_data(self.input_name_base % (self.mode, 'ud'))
        data_embeddings = util.read_data(self.input_name_base % (self.mode, self.representation))

        x_raw, y_raw = [], []
        for (sentence_ud, words), (sentence_emb, _) in zip(data_ud, data_embeddings):
            for i, token in enumerate(sentence_ud):
                head = token['head']
                rel = token['rel']

                if rel == "_" or rel == "root":
                    continue

                x_raw_tail = sentence_emb[i]
                x_raw_head = sentence_emb[head - 1]

                x_raw += [np.concatenate([x_raw_tail, x_raw_head])]
                y_raw += [rel]

        x_raw = np.array(x_raw)
        y_raw = np.array(y_raw)

        return x_raw, y_raw
Ejemplo n.º 4
0
    def load_data_index(self):
        data_ud = util.read_data(self.input_name_base % (self.mode, 'ud'))

        x_raw, y_raw = [], []
        for sentence_ud, words in data_ud:
            for i, token in enumerate(sentence_ud):
                pos_tag = token['pos']

                if pos_tag == "_" or pos_tag == "X":
                    continue

                x_raw += [words[i]]
                y_raw += [pos_tag]

        x_raw = np.array(x_raw)
        y_raw = np.array(y_raw)

        return x_raw, y_raw
def load_losses(lang, model_path, keep_eos=False):
    fname = 'losses.pckl'
    results_file = '%s/%s' % (model_path, fname)
    results = util.read_data(results_file)

    if not keep_eos:
        loss_value = 'losses_no_eos'
    else:
        loss_value = 'losses'

    loss = results['test'][loss_value].cpu().numpy() / math.log(2)
    y_values = results['test']['y_values'].cpu().numpy()
    if not keep_eos:
        mask = (y_values == 2)
        loss[mask] = 0
        y_values[mask] = 0

    lengths = (y_values != 0).sum(1)
    if keep_eos:
        lengths = lengths - 1

    return loss, y_values, lengths
Ejemplo n.º 6
0
    def load_data_index(self):
        data_ud = util.read_data(self.input_name_base % (self.mode, 'ud'))

        x_raw, y_raw = [], []
        for sentence_ud, words in data_ud:
            for i, token in enumerate(sentence_ud):
                head = token['head']
                rel = token['rel']

                if rel == "_" or rel == "root":
                    continue

                x_raw_tail = words[i]
                x_raw_head = words[head - 1]

                x_raw += [[x_raw_tail, x_raw_head]]
                y_raw += [rel]

        x_raw = np.array(x_raw)
        y_raw = np.array(y_raw)

        return x_raw, y_raw
Ejemplo n.º 7
0
    def iterate_index(self):
        data_ud = util.read_data(self.input_name_base % (self.mode, 'ud'))

        for (sentence_ud, words) in data_ud:
            yield sentence_ud, np.array(words)
                        type=int,
                        default=30,
                        metavar='N',
                        help='number of epochs to test [default: 30]')
    parser.add_argument('--lamb',
                        type=float,
                        default=1,
                        help='trade off parameter [default: 1]')
    parser.add_argument('--missing-rate',
                        type=float,
                        default=0,
                        help='view missing rate [default: 0]')
    args = parser.parse_args()

    # read data
    trainData, testData, view_num = read_data('./data/PIE_face_10.mat', 0.8, 1)
    outdim_size = [trainData.data[str(i)].shape[1] for i in range(view_num)]
    # set layer size
    layer_size = [[300, outdim_size[i]] for i in range(view_num)]
    # set parameter
    epoch = [args.epochs_train, args.epochs_test]
    learning_rate = [0.01, 0.01]
    # Randomly generated missing matrix
    Sn = get_sn(view_num, trainData.num_examples + testData.num_examples,
                args.missing_rate)
    Sn_train = Sn[np.arange(trainData.num_examples)]
    Sn_test = Sn[np.arange(testData.num_examples) + trainData.num_examples]

    Sn = torch.LongTensor(Sn).cuda()
    Sn_train = torch.LongTensor(Sn_train).cuda()
    Sn_test = torch.LongTensor(Sn_test).cuda()
Ejemplo n.º 9
0
                        type=int,
                        default=20,
                        metavar='N',
                        help='number of epochs to test [default: 100]')
    parser.add_argument('--lamb',
                        type=float,
                        default=1,
                        help='trade off parameter [default: 10]')
    parser.add_argument('--missing-rate',
                        type=float,
                        default=0,
                        help='view missing rate [default: 0]')
    args = parser.parse_args()

    # read data
    trainData, testData, view_num = read_data('./data/yaleB_mtv.mat', 0.8, 1)
    outdim_size = [trainData.data[str(i)].shape[1] for i in range(view_num)]
    # set layer size
    layer_size = [[350, outdim_size[i]] for i in range(view_num)]
    # set parameter
    epoch = [args.epochs_train, args.epochs_test]
    learning_rate = [0.01, 0.01]
    # Randomly generated missing matrix
    Sn = get_sn(view_num, trainData.num_examples + testData.num_examples,
                args.missing_rate)
    Sn_train = Sn[np.arange(trainData.num_examples)]
    Sn_test = Sn[np.arange(testData.num_examples) + trainData.num_examples]
    # Model building
    model = CPMNets(view_num,
                    trainData.num_examples,
                    testData.num_examples,
Ejemplo n.º 10
0
def load_data(fname):
    return util.read_data(fname)
Ejemplo n.º 11
0
                        type=int,
                        default=300,
                        metavar='N',
                        help='number of epochs to test [default: 60]')
    parser.add_argument('--lamb',
                        type=float,
                        default=10,
                        help='trade off parameter [default: 1]')
    parser.add_argument('--missing-rate',
                        type=float,
                        default=0.5,
                        help='view missing rate [default: 0]')
    args = parser.parse_args()

    # read data
    trainData, testData, view_num = read_data(
        './data/cub_googlenet_doc2vec_c10.mat', 0.8, 1)
    outdim_size = [trainData.data[str(i)].shape[1] for i in range(view_num)]
    # set layer size
    layer_size = [[outdim_size[i]] for i in range(view_num)]
    # set parameter
    epoch = [args.epochs_train, args.epochs_test]
    learning_rate = [0.001, 0.01]
    # Randomly generated missing matrix
    Sn = get_sn(view_num, trainData.num_examples + testData.num_examples,
                args.missing_rate)
    Sn_train = Sn[np.arange(trainData.num_examples)]
    Sn_test = Sn[np.arange(testData.num_examples) + trainData.num_examples]
    # Model building
    model = CPMNets(view_num, trainData.num_examples, testData.num_examples,
                    layer_size, args.lsd_dim, learning_rate, args.lamb)
    # train
Ejemplo n.º 12
0
                        type=int,
                        default=100,
                        metavar='N',
                        help='number of epochs to test [default: 50]')
    parser.add_argument('--lamb',
                        type=float,
                        default=10.,
                        help='trade off parameter [default: 10]')
    parser.add_argument('--missing-rate',
                        type=float,
                        default=0,
                        help='view missing rate [default: 0]')
    args = parser.parse_args()

    # read data
    trainData, testData, view_num = read_data('./data/animal.mat', 0.8, 1)
    outdim_size = [trainData.data[str(i)].shape[1] for i in range(view_num)]
    # set layer size
    layer_size = [[outdim_size[i]] for i in range(view_num)]
    # set parameter
    epoch = [args.epochs_train, args.epochs_test]
    learning_rate = [0.001, 0.01]
    # Randomly generated missing matrix
    Sn = get_sn(view_num, trainData.num_examples + testData.num_examples,
                args.missing_rate)
    Sn_train = Sn[np.arange(trainData.num_examples)]
    Sn_test = Sn[np.arange(testData.num_examples) + trainData.num_examples]
    # Model building
    model = CPMNets(view_num, trainData.num_examples, testData.num_examples,
                    layer_size, args.lsd_dim, learning_rate, args.lamb)
    # train