Python getTrainingData Examples

Programming Language: Python

Namespace/Package Name: dataProcessTools

Method/Function: getTrainingData

Examples at hotexamples.com: 5

Python getTrainingData - 5 examples found. These are the top rated real world Python examples of dataProcessTools.getTrainingData extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

def interactiveGRUTraining(
    trainingDataFile=main_dir + 'facebook.splits/train.10/train_classmate_1',
    wordsEmbeddings=None,
    wordsEmbeddings_path=main_dir + 'facebook/nodesFeatures',
    typesEmbeddings=None,
    typesEmbeddings_path='',
    word_dimension=22,
    type_dimension=20,
    dimension=64,
    attention_dimension=12,
    wordsSize=1000000,
    subpaths_map=None,
    subpaths_file=main_dir + 'facebook/subpathsSaveFile',
    sequences_map=None,
    sequences_file='',
    maxlen_subpaths=1000,
    maxlen=100,  # Sequence longer then this get ignored 
    batch_size=1,
    is_shuffle_for_batch=False,
    alpha=0.1,
    beta=0.1,
    gamma=0.1,
    objective_function_method='hinge-loss',
    objective_function_param=0,
    lrate=0.0001,
    max_epochs=10,
    dispFreq=5,
    saveFreq=5,
    saveto=main_dir + 'facebook/path2vec-modelParams.npz',
    decay=0.01,
):
    model_options = locals().copy()

    if wordsEmbeddings is None:
        if wordsEmbeddings_path is not None:
            wordsEmbeddings, dimension, wordsSize = dataProcessTools.getWordsEmbeddings(
                wordsEmbeddings_path)
        else:
            print 'Exit...'
            exit(0)
    if typesEmbeddings is None:
        if typesEmbeddings_path is not None:
            typesEmbeddings, type_dimension, wordsSize = dataProcessTools.getTypesEmbeddings(
                typesEmbeddings_path)
        else:
            print 'Exit...'
            exit(0)

    trainingData, trainingPairsData = dataProcessTools.getTrainingData(
        trainingDataFile)
    allBatches = dataProcessTools.get_minibatches_idx(len(trainingData),
                                                      batch_size,
                                                      is_shuffle_for_batch)

    sequences_data = dataProcessTools.readAllSequencesFromFile(sequences_file)

    params = init_sharedVariables(model_options)
    tparams = init_tparams(params)
    print 'Generate models ......'
    trainingParis, sequences_matrix, dependency_matrix, dependWeight_matrix, sequencesLen_vector, discountSeq_matrix, discountForEachNode_matrix, wordsEmbs, typesEmbs, masks_matrix, groups_tensor, cost = interactiveGRULearningBatch.interactiveGRULearning(
        model_options, tparams)

    print 'Generate gradients ......'
    grads = tensor.grad(cost, wrt=list(tparams.values()))
    print 'Using Adadelta to generate functions ......'
    lr = tensor.scalar(name='lr')
    f_grad_shared, f_update = adadelta(
        lr, tparams, grads, trainingParis, sequences_matrix, dependency_matrix,
        dependWeight_matrix, sequencesLen_vector, discountSeq_matrix,
        discountForEachNode_matrix, wordsEmbs, typesEmbs, masks_matrix,
        groups_tensor, cost)

    print 'Start training models ......'
    best_p = None
    history_cost = []

    start_time = time.time()
    print 'start time ==', time.strftime('%Y-%m-%d %H:%M:%S',
                                         time.localtime(time.time()))
    uidx = 0
    for eidx in range(max_epochs):
        for _, batch in allBatches:
            uidx += 1
            trainingDataForBatch = [trainingData[i] for i in batch]
            trainingPairsForBatch = [trainingPairsData[i] for i in batch]
            trainingParis_data, sequences_matrix_data, dependency_matrix_data, dependWeight_matrix_data, sequencesLen_vector_data, discountSeq_matrix_data, discountForEachNode_matrix_data, masks_matrix_data, groups_tensor_data = dataProcessTools.prepareDataForTrainingBatch(
                trainingDataForBatch, trainingPairsForBatch, sequences_data,
                alpha, beta, gamma)
            if len(trainingParis_data) == 0:
                continue
            cost = f_grad_shared(
                trainingParis_data, sequences_matrix_data,
                dependency_matrix_data, dependWeight_matrix_data,
                sequencesLen_vector_data, discountSeq_matrix_data,
                discountForEachNode_matrix_data, wordsEmbeddings,
                typesEmbeddings, masks_matrix_data, groups_tensor_data)
            f_update(lrate)
            if numpy.isnan(cost) or numpy.isinf(cost):
                print('bad cost detected: ', cost)
                return
            if numpy.mod(uidx, dispFreq) == 0:
                print 'Epoch =', eidx, ',  Update =', uidx, ',  Cost =', cost
            if saveto and numpy.mod(uidx, saveFreq) == 0:
                print 'Saving... time ==', time.strftime(
                    '%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
                if best_p is not None:
                    params = best_p
                else:
                    params = unzip(tparams)
                numpy.savez(saveto, history_errs=history_cost, **params)
                pickle.dump(model_options, open('%s.pkl' % saveto, 'wb'), -1)
                print('Done')
    end_time = time.time()
    print 'end time ==', time.strftime('%Y-%m-%d %H:%M:%S',
                                       time.localtime(end_time))
    print 'Training finished! Cost time == ', end_time - start_time, ' s'

Example #2

Show file

File: proxEmbedBySubgraphs.py Project: wsgan001/D2AGE-1

def proxEmbedBySubgraphs(
    trainingDataFile=main_dir + 'train_classmate',
    wordsEmbeddings_data=None,
    wordsEmbeddings_path=main_dir + 'facebook/nodesFeatures',
    subpaths_map=None,
    subpaths_file=main_dir + 'facebook/subpathsSaveFile',
    subgraphSaveFile='',
    maxlen_subpaths=1000,
    wordsSize=1000000,
    maxlen=100,
    batch_size=1,
    is_shuffle_for_batch=False,
    dispFreq=5,
    saveFreq=5,
    saveto=main_dir + 'facebook/path2vec-modelParams.npz',
    lrate=0.0001,
    word_dimension=22,
    dimension=64,
    discount_alpha=0.3,
    discount_beta=0.3,
    h_output_method='max-pooling',
    objective_function_method='hinge-loss',
    objective_function_param=0,
    max_epochs=10,
    decay=0.01,
):
    model_options = locals().copy()

    if wordsEmbeddings_data is None:
        if wordsEmbeddings_path is not None:
            wordsEmbeddings_data, word_dimension, wordsSize = dataProcessTools.getWordsEmbeddings(
                wordsEmbeddings_path)
        else:
            exit(0)
    trainingData, trainingPairs_data = dataProcessTools.getTrainingData(
        trainingDataFile)
    allBatches = dataProcessTools.get_minibatches_idx(len(trainingData),
                                                      batch_size,
                                                      is_shuffle_for_batch)

    subgraphs = dataProcessTools.readAllSubgraphDependencyAndSequencesWithLengths(
        subgraphSaveFile)

    params = init_sharedVariables(model_options)
    tparams = init_tparams(params)
    print 'Generate models ......'

    trainingPairs, sequences, masks, lengths, subgraph_lens, wordsEmbeddings, buffer_tensor, nodesLens, cost = proxEmbedBySubgraphModel.proxEmbedBySubgraphModel(
        model_options, tparams)

    print 'Generate gradients ......'
    grads = tensor.grad(cost, wrt=list(tparams.values()))
    print 'Using Adadelta to generate functions ......'
    this_time = time.time()
    print 'Start to compile and optimize, time ==', time.strftime(
        '%Y-%m-%d %H:%M:%S', time.localtime(this_time))
    lr = tensor.scalar(name='lr')
    f_grad_shared, f_update = adadelta(lr, tparams, grads, trainingPairs,
                                       sequences, masks, lengths,
                                       subgraph_lens, wordsEmbeddings,
                                       buffer_tensor, nodesLens, cost)

    print 'Start training models ......'
    best_p = None
    history_cost = []

    start_time = time.time()
    print 'start time ==', time.strftime('%Y-%m-%d %H:%M:%S',
                                         time.localtime(start_time))
    uidx = 0
    for eidx in range(max_epochs):
        for _, batch in allBatches:
            uidx += 1
            trainingDataForBatch = [trainingData[i] for i in batch]
            trainingPairsForBatch = [trainingPairs_data[i] for i in batch]
            tuples3DMatrix_data, x_data, mask_data, lens_data, subgraph_lens_data, buffer_tensor_data, nodesLens_data = dataProcessTools.generateSequenceAndMasksForSingleSequenceWithLength(
                trainingDataForBatch, trainingPairsForBatch, subgraphs,
                dimension)
            cost = f_grad_shared(tuples3DMatrix_data, x_data, mask_data,
                                 lens_data, subgraph_lens_data,
                                 wordsEmbeddings_data, buffer_tensor_data,
                                 nodesLens_data)
            f_update(lrate)

            if numpy.isnan(cost) or numpy.isinf(cost):
                print('bad cost detected: ', cost)
                return
            if numpy.mod(uidx, dispFreq) == 0:
                print 'Epoch =', eidx, ',  Update =', uidx, ',  Cost =', cost
                this_time = time.time()
                print 'Time ==', time.strftime('%Y-%m-%d %H:%M:%S',
                                               time.localtime(this_time))
            if saveto and numpy.mod(uidx, saveFreq) == 0:
                print('Saving...')
                if best_p is not None:
                    params = best_p
                else:
                    params = unzip(tparams)
                numpy.savez(saveto, history_errs=history_cost, **params)
                pickle.dump(model_options, open('%s.pkl' % saveto, 'wb'), -1)
                print('Done')
        gc.collect()

    end_time = time.time()
    print 'end time ==', time.strftime('%Y-%m-%d %H:%M:%S',
                                       time.localtime(end_time))
    print 'Training finished! Cost time == ', end_time - start_time, ' s'

Example #3

Show file

def proxEmbedTraining(
    trainingDataFile=main_dir +
    'facebook.splits/train.10/train_classmate_1',  # the full path of training data file
    wordsEmbeddings=None,  # words embeddings
    wordsEmbeddings_path=main_dir +
    'facebook/nodesFeatures',  # the file path of words embeddings
    word_dimension=22,  # dimension of words embeddings
    dimension=64,  # the dimension of paths embeddings
    wordsSize=1000000,  # the size of words vocabulary
    subpaths_map=None,  # contains sub-paths
    subpaths_file=main_dir +
    'facebook/subpathsSaveFile',  # the file which contains sub-paths
    maxlen_subpaths=1000,  # the max length for sub-paths
    h_output_method='mean-pooling',  # the output way of lstm. There are three ways, "h" only uses the last output h as the output of lstm for one path; "mean-pooling" uses the mean-pooling of all hi as the output of lstm for one path; "max-pooling" uses the max-pooling of all hi as the output of lstm for one path.
    maxlen=100,  # Sequence longer then this get ignored 
    batch_size=1,  # use a batch for training. This is the size of this batch.
    is_shuffle_for_batch=False,  # if need shuffle for training
    discount_alpha=0.1,  # the parameter alpha for discount. The longer the subpath, the little will the weight be.
    subpaths_pooling_method='max-pooling',  # the ways to combine several subpaths to one. "mean-pooling" means to combine all subpaths to one by mean-pooling; "max-pooling" means to combine all subpaths to one by max-pooling.
    objective_function_method='hinge-loss',  # loss function, we use sigmoid
    objective_function_param=0,  # the parameter in loss function, beta
    lrate=0.0001,  # learning rate
    max_epochs=10,  # the max epochs for training
    dispFreq=5,  # the frequences for display
    saveFreq=5,  # the frequences for saving the parameters
    saveto=main_dir +
    'facebook/proxEmbed-modelParams.npz',  # the path for saving parameters. It is generated by main_dir, dataset_name, suffix, class_name and index.

    # the normalization of this model, l2-norm of all parameters
    decay_lstm_W=0.01,
    decay_lstm_U=0.01,
    decay_lstm_b=0.01,
    decay_w=0.01,
):
    """
    The training stage of ProxEmbed
    """
    model_options = locals().copy()

    if wordsEmbeddings is None:
        if wordsEmbeddings_path is not None:
            wordsEmbeddings, dimension, wordsSize = dataProcessTools.getWordsEmbeddings(
                wordsEmbeddings_path)
        else:
            print 'There is not path for wordsEmbeddings, exit!!!'
            exit(0)

    if subpaths_map is None:
        if subpaths_file is not None:
            subpaths_map = dataProcessTools.loadAllSubPaths(
                subpaths_file, maxlen_subpaths)
        else:
            print 'There is not path for sub-paths, exit!!!'
            exit(0)

    trainingData, trainingPairs = dataProcessTools.getTrainingData(
        trainingDataFile)
    allBatches = dataProcessTools.get_minibatches_idx(len(trainingData),
                                                      batch_size,
                                                      is_shuffle_for_batch)

    params = init_sharedVariables(model_options)
    tparams = init_tparams(params)
    print 'Generate models ......'

    trainingParis, subPaths_matrix, subPaths_mask, subPaths_lens, wemb, cost = proxEmbedModelMulti.proxEmbedModel(
        model_options, tparams)

    print 'Generate gradients ......'
    grads = tensor.grad(cost, wrt=list(tparams.values()))
    print 'Using Adadelta to generate functions ......'
    lr = tensor.scalar(name='lr')
    f_grad_shared, f_update = adadelta(lr, tparams, grads, trainingParis,
                                       subPaths_matrix, subPaths_mask,
                                       subPaths_lens, wemb, cost)

    print 'Start training models ......'
    best_p = None
    history_cost = []

    models_count = [0, 0, 0, 0]

    start_time = time.time()
    print 'start time ==', time.strftime('%Y-%m-%d %H:%M:%S',
                                         time.localtime(start_time))
    uidx = 0
    for eidx in range(max_epochs):
        for _, batch in allBatches:
            uidx += 1
            trainingDataForBatch = [trainingData[i] for i in batch]
            trainingPairsForBatch = [trainingPairs[i] for i in batch]
            triples_matrix_data, subPaths_matrix_data, subPaths_mask_data, subPaths_lens_data = dataProcessTools.prepareDataForTraining(
                trainingDataForBatch, trainingPairsForBatch, subpaths_map)
            cost = 0
            cost = f_grad_shared(triples_matrix_data, subPaths_matrix_data,
                                 subPaths_mask_data, subPaths_lens_data,
                                 wordsEmbeddings)
            f_update(lrate)

            if numpy.isnan(cost) or numpy.isinf(cost):
                print('bad cost detected: ', cost)
                return
            if numpy.mod(uidx, dispFreq) == 0:
                print 'Epoch =', eidx, ',  Update =', uidx, ',  Cost =', cost
                print 'models_count ==', models_count
            if saveto and numpy.mod(uidx, saveFreq) == 0:
                print('Saving...')
                if best_p is not None:
                    params = best_p
                else:
                    params = unzip(tparams)
                numpy.savez(saveto, history_errs=history_cost, **params)
                pickle.dump(model_options, open('%s.pkl' % saveto, 'wb'), -1)
                print('Done')
    end_time = time.time()
    print 'end time ==', time.strftime('%Y-%m-%d %H:%M:%S',
                                       time.localtime(end_time))
    print 'Training finished! Cost time == ', end_time - start_time, ' s'

Example #4

Show file

File: attentionBatch.py Project: wsgan001/SPE

def metagraphAttentionTraining(
    trainingDataFile=main_dir +
    'facebook.splits/train.10/train_classmate_1',  # the full path of training data file
    metagraphEmbeddings_path='',  # the file path of metagraph embeddings
    wordsEmbeddings_data=None,  # words embeddings
    wordsEmbeddings_path=main_dir +
    'facebook/nodesFeatures',  # the file path of words embeddings
    wordsSize=1000000,  # the size of words vocabulary
    subpaths_map=None,  # contains sub-paths
    subpaths_file=main_dir +
    'facebook/subpathsSaveFile',  # the file which contains sub-paths
    maxlen_subpaths=1000,  # the max length for sub-paths
    maxlen=100,  # Sequence longer then this get ignored 
    batch_size=10,  # use a batch for training. This is the size of this batch.
    is_shuffle_for_batch=True,  # if need shuffle for training
    objective_function_method='sigmoid',  # loss function, we use sigmoid here
    objective_function_param=0,  # the parameter in loss function, beta
    lrate=0.0001,  # learning rate
    max_epochs=100,  # the max epochs for training
    dispFreq=5,  # the frequences for display
    saveFreq=5,  # the frequences for saving the parameters
    saveto=main_dir +
    'facebook/path2vec-modelParams.npz',  # the path for saving parameters. It is generated by main_dir, dataset_name, suffix, class_name and index.

    # all dimensions parameters
    metagraph_embedding_dimension=10,  # metagraph embedding dimension 
    dimension_A=10,  # the dimension of attention when computing the m-node embedding
    dimension_lstm=10,  # dimension of lstm parameters
    dimension_B=10,  # the dimension of attention when computing the m-path embedding
    dimension_C=10,  # the dimension of attention when computing the m-paths embedding

    # decay parameters
    decay_Q_A=0.001,
    decay_b_A=0.001,
    decay_eta_A=0.001,
    decay_lstm_W=0.001,
    decay_lstm_U=0.001,
    decay_lstm_b=0.001,
    decay_Q_B=0.001,
    decay_b_B=0.001,
    decay_eta_B=0.001,
    decay_Q_C=0.001,
    decay_b_C=0.001,
    decay_eta_C=0.001,
    decay_w=0.001,
):
    # get all parameters
    model_options = locals().copy()

    if wordsEmbeddings_data is None:
        if wordsEmbeddings_path is not None:
            wordsEmbeddings_data, dimension, wordsSize = dataProcessTools.getWordsEmbeddings(
                wordsEmbeddings_path)
        else:
            print 'There is not path for wordsEmbeddings, exit!!!'
            exit(0)

    if subpaths_map is None:
        if subpaths_file is not None:
            subpaths_map = dataProcessTools.loadAllSubPathsRomove0Path(
                subpaths_file, maxlen_subpaths, wordsEmbeddings_data)
        else:
            print 'There is not path for sub-paths, exit!!!'
            exit(0)

    metagraphEmbedding_data, metagraphDimension, metagraphSize = dataProcessTools.getMetagraphEmbeddings(
        metagraphEmbeddings_path)

    trainingData, trainingPairs_data = dataProcessTools.getTrainingData(
        trainingDataFile)
    allBatches = dataProcessTools.get_minibatches_idx(len(trainingData),
                                                      batch_size,
                                                      is_shuffle_for_batch)
    '''
        init shared variables
    '''
    params = init_sharedVariables(model_options)
    tparams = init_tparams(params)
    print 'Generate models ......'

    metagraphEmbeddings, trainingParis, subPaths_matrix, subPaths_mask, wordsEmbeddings, cost = subgraphAttentionModelLSTMBatch.metagraphAttentionModel(
        model_options, tparams)

    print 'Generate gradients ......'
    grads = tensor.grad(cost, wrt=list(tparams.values()))
    print 'Using Adadelta to generate functions ......'
    this_time = time.time()
    print 'Start to compile and optimize, time ==', time.strftime(
        '%Y-%m-%d %H:%M:%S', time.localtime(this_time))
    lr = tensor.scalar(name='lr')
    f_grad_shared, f_update = adadelta(lr, tparams, grads, metagraphEmbeddings,
                                       trainingParis, subPaths_matrix,
                                       subPaths_mask, wordsEmbeddings, cost)

    print 'Start training models ......'
    best_p = None
    history_cost = []  # not use

    start_time = time.time()
    print 'start time ==', time.strftime('%Y-%m-%d %H:%M:%S',
                                         time.localtime(start_time))
    uidx = 0
    for eidx in range(max_epochs):
        for _, batch in allBatches:
            uidx += 1
            # prepare data for this model
            trainingDataForBatch = [trainingData[i] for i in batch]
            trainingPairsForBatch = [trainingPairs_data[i] for i in batch]
            triples_matrix_data, subPaths_matrix_data, subPaths_mask_data, subPaths_lens_data = dataProcessTools.prepareDataForTraining(
                trainingDataForBatch, trainingPairsForBatch, subpaths_map)
            cost = 0
            cost = f_grad_shared(metagraphEmbedding_data, triples_matrix_data,
                                 subPaths_matrix_data, subPaths_mask_data,
                                 wordsEmbeddings_data)
            f_update(lrate)

            trainingDataForBatch = None
            trainingPairsForBatch = None
            del triples_matrix_data
            del subPaths_matrix_data
            del subPaths_mask_data
            del subPaths_lens_data

            if numpy.isnan(cost) or numpy.isinf(cost):
                print('bad cost detected: ', cost)
                return
            if numpy.mod(uidx, dispFreq) == 0:
                print 'Epoch =', eidx, ',  Update =', uidx, ',  Cost =', cost
                this_time = time.time()
                print 'Time ==', time.strftime('%Y-%m-%d %H:%M:%S',
                                               time.localtime(this_time))
            if saveto and numpy.mod(uidx, saveFreq) == 0:
                print('Saving...')
                if best_p is not None:
                    params = best_p
                else:
                    params = unzip(tparams)

                numpy.savez(saveto, history_errs=history_cost, **params)
                pickle.dump(model_options, open('%s.pkl' % saveto, 'wb'), -1)
                print('Done')
        gc.collect()

    end_time = time.time()
    print 'end time ==', time.strftime('%Y-%m-%d %H:%M:%S',
                                       time.localtime(end_time))
    print 'Training finished! Cost time == ', end_time - start_time, ' s'

Example #5

Show file

File: proxEmbed2.py Project: shanry/adapted-ProxEmbed

def proxEmbedTraining(
        trainingDataFile=main_dir + 'facebook.splits/train.10/train_classmate_1',  # the full path of training data file
        wordsEmbeddings=None,  # words embeddings
        wordsEmbeddings_path=main_dir + 'facebook/nodesFeatures',  # the file path of words embeddings
        word_dimension=22,  # dimension of words embeddings
        dimension=64,  # the dimension of paths embeddings
        wordsSize=1000000,  # the size of words vocabulary
        subpaths_map=None,  # contains sub-paths
        subpaths_file=main_dir + 'facebook/subpathsSaveFile',  # the file which contains sub-paths
        maxlen_subpaths=1000,  # the max length for sub-paths
        h_output_method='mean-pooling',
        # the output way of lstm. There are three ways, "h" only uses the last output h as the output of lstm for one path; "mean-pooling" uses the mean-pooling of all hi as the output of lstm for one path; "max-pooling" uses the max-pooling of all hi as the output of lstm for one path.
        maxlen=100,  # Sequence longer then this get ignored
        batch_size=1,  # use a batch for training. This is the size of this batch.
        is_shuffle_for_batch=False,  # if need shuffle for training
        discount_alpha=0.1,  # the parameter alpha for discount. The longer the subpath, the little will the weight be.
        subpaths_pooling_method='max-pooling',
        # the ways to combine several subpaths to one. "mean-pooling" means to combine all subpaths to one by mean-pooling; "max-pooling" means to combine all subpaths to one by max-pooling.
        objective_function_method='hinge-loss',  # loss function, we use sigmoid
        objective_function_param=0,  # the parameter in loss function, beta
        lrate=0.0001,  # learning rate
        max_epochs=10,  # the max epochs for training

        dispFreq=5,  # the frequences for display
        saveFreq=5,  # the frequences for saving the parameters
        saveto=main_dir + 'facebook/proxEmbed-modelParams.npz',
        # the path for saving parameters. It is generated by main_dir, dataset_name, suffix, class_name and index.

        # the normalization of this model, l2-norm of all parameters
        decay_lstm_W=0.01,
        decay_lstm_U=0.01,
        decay_lstm_b=0.01,
        decay_w=0.01,

        num_group=0,
        dataset_name="",
        class_name="",
        main_dir = ""

):
    """
    The training stage of ProxEmbed
    """
    model_options = locals().copy()
    model_options.pop('wordsEmbeddings')
    print(model_options)

    if wordsEmbeddings is None:
        if wordsEmbeddings_path is not None:
            wordsEmbeddings, dimension, wordsSize = dataProcessTools.getWordsEmbeddings(wordsEmbeddings_path)
            # print("wordsEmbeddings:", wordsEmbeddings.shape, dimension, wordsSize)
        else:
            print 'There is not path for wordsEmbeddings, exit!!!'
            exit(0)

    if subpaths_map is None:
        if subpaths_file is not None:
            subpaths_map = dataProcessTools.loadAllSubPaths(subpaths_file, maxlen_subpaths)
            # print("subpaths_map:", len(subpaths_map))
            # print(subpaths_map)
        else:
            print 'There is not path for sub-paths, exit!!!'
            exit(0)

    cost_time = []

    for num_of_group in range(num_group):
        num_of_group += 1
        suffix = str(num_of_group)
        index = str(num_of_group)
        trainingDataFile = os.path.join(main_dir + '/', dataset_name + '.splits', "train." + suffix, 'train_' + class_name + '_' + '1')
        saveto = os.path.join(main_dir + '/', dataset_name + '.trainModels', 'train.' + suffix,
                              'train_' + class_name + '_' + index + '.npz')
        trainingData, trainingPairs = dataProcessTools.getTrainingData(trainingDataFile)
        allBatches = dataProcessTools.get_minibatches_idx(len(trainingData), batch_size, is_shuffle_for_batch)

        params = init_sharedVariables(model_options)
        tparams = init_tparams(params)
        print 'Generate models ......'

        trainingParis, subPaths_matrix, subPaths_mask, subPaths_lens, wemb, cost = proxEmbedModelMulti.proxEmbedModel(
            model_options, tparams)
        print("trainingParis:", type(trainingParis), trainingParis.shape)

        print 'Generate gradients ......'
        grads = tensor.grad(cost, wrt=list(tparams.values()))
        print 'Using Adadelta to generate functions ......'
        lr = tensor.scalar(name='lr')
        f_grad_shared, f_update = adadelta(lr, tparams, grads, trainingParis, subPaths_matrix, subPaths_mask,
                                           subPaths_lens,
                                           wemb, cost)

        print 'Start training models ......'
        best_p = None
        history_cost = []

        models_count = [0, 0, 0, 0]

        start_time = time.time()
        print 'start time ==', time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start_time))
        uidx = 0
        for eidx in range(max_epochs):
            for _, batch in allBatches:
                uidx += 1
                trainingDataForBatch = [trainingData[i] for i in batch]
                trainingPairsForBatch = [trainingPairs[i] for i in batch]
                triples_matrix_data, subPaths_matrix_data, subPaths_mask_data, subPaths_lens_data = dataProcessTools.prepareDataForTraining(
                    trainingDataForBatch, trainingPairsForBatch, subpaths_map)
                cost = 0
                cost = f_grad_shared(triples_matrix_data, subPaths_matrix_data, subPaths_mask_data, subPaths_lens_data,
                                     wordsEmbeddings)
                f_update(lrate)

                if numpy.isnan(cost) or numpy.isinf(cost):
                    print('bad cost detected: ', cost)
                    return
                if numpy.mod(uidx, dispFreq) == 0:
                    print 'Epoch =', eidx, ',  Update =', uidx, ',  Cost =', cost
                    print 'models_count ==', models_count
                if saveto and numpy.mod(uidx, saveFreq) == 0:
                    print('Saving...')
                    if best_p is not None:
                        params = best_p
                    else:
                        params = unzip(tparams)
                    numpy.savez(saveto, history_errs=history_cost, **params)
                    pickle.dump(model_options, open('%s.pkl' % saveto, 'wb'), -1)
                    print('Done')
        end_time = time.time()
        print 'end time ==', time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(end_time))
        print 'Training finished! Cost time == ', end_time - start_time, ' s'
        cost_time.append(end_time - start_time)
    return cost_time