예제 #1
0
def interactiveGRUTraining(
    trainingDataFile=main_dir + 'facebook.splits/train.10/train_classmate_1',
    wordsEmbeddings=None,
    wordsEmbeddings_path=main_dir + 'facebook/nodesFeatures',
    typesEmbeddings=None,
    typesEmbeddings_path='',
    word_dimension=22,
    type_dimension=20,
    dimension=64,
    attention_dimension=12,
    wordsSize=1000000,
    subpaths_map=None,
    subpaths_file=main_dir + 'facebook/subpathsSaveFile',
    sequences_map=None,
    sequences_file='',
    maxlen_subpaths=1000,
    maxlen=100,  # Sequence longer then this get ignored 
    batch_size=1,
    is_shuffle_for_batch=False,
    alpha=0.1,
    beta=0.1,
    gamma=0.1,
    objective_function_method='hinge-loss',
    objective_function_param=0,
    lrate=0.0001,
    max_epochs=10,
    dispFreq=5,
    saveFreq=5,
    saveto=main_dir + 'facebook/path2vec-modelParams.npz',
    decay=0.01,
):
    model_options = locals().copy()

    if wordsEmbeddings is None:
        if wordsEmbeddings_path is not None:
            wordsEmbeddings, dimension, wordsSize = dataProcessTools.getWordsEmbeddings(
                wordsEmbeddings_path)
        else:
            print 'Exit...'
            exit(0)
    if typesEmbeddings is None:
        if typesEmbeddings_path is not None:
            typesEmbeddings, type_dimension, wordsSize = dataProcessTools.getTypesEmbeddings(
                typesEmbeddings_path)
        else:
            print 'Exit...'
            exit(0)

    trainingData, trainingPairsData = dataProcessTools.getTrainingData(
        trainingDataFile)
    allBatches = dataProcessTools.get_minibatches_idx(len(trainingData),
                                                      batch_size,
                                                      is_shuffle_for_batch)

    sequences_data = dataProcessTools.readAllSequencesFromFile(sequences_file)

    params = init_sharedVariables(model_options)
    tparams = init_tparams(params)
    print 'Generate models ......'
    trainingParis, sequences_matrix, dependency_matrix, dependWeight_matrix, sequencesLen_vector, discountSeq_matrix, discountForEachNode_matrix, wordsEmbs, typesEmbs, masks_matrix, groups_tensor, cost = interactiveGRULearningBatch.interactiveGRULearning(
        model_options, tparams)

    print 'Generate gradients ......'
    grads = tensor.grad(cost, wrt=list(tparams.values()))
    print 'Using Adadelta to generate functions ......'
    lr = tensor.scalar(name='lr')
    f_grad_shared, f_update = adadelta(
        lr, tparams, grads, trainingParis, sequences_matrix, dependency_matrix,
        dependWeight_matrix, sequencesLen_vector, discountSeq_matrix,
        discountForEachNode_matrix, wordsEmbs, typesEmbs, masks_matrix,
        groups_tensor, cost)

    print 'Start training models ......'
    best_p = None
    history_cost = []

    start_time = time.time()
    print 'start time ==', time.strftime('%Y-%m-%d %H:%M:%S',
                                         time.localtime(time.time()))
    uidx = 0
    for eidx in range(max_epochs):
        for _, batch in allBatches:
            uidx += 1
            trainingDataForBatch = [trainingData[i] for i in batch]
            trainingPairsForBatch = [trainingPairsData[i] for i in batch]
            trainingParis_data, sequences_matrix_data, dependency_matrix_data, dependWeight_matrix_data, sequencesLen_vector_data, discountSeq_matrix_data, discountForEachNode_matrix_data, masks_matrix_data, groups_tensor_data = dataProcessTools.prepareDataForTrainingBatch(
                trainingDataForBatch, trainingPairsForBatch, sequences_data,
                alpha, beta, gamma)
            if len(trainingParis_data) == 0:
                continue
            cost = f_grad_shared(
                trainingParis_data, sequences_matrix_data,
                dependency_matrix_data, dependWeight_matrix_data,
                sequencesLen_vector_data, discountSeq_matrix_data,
                discountForEachNode_matrix_data, wordsEmbeddings,
                typesEmbeddings, masks_matrix_data, groups_tensor_data)
            f_update(lrate)
            if numpy.isnan(cost) or numpy.isinf(cost):
                print('bad cost detected: ', cost)
                return
            if numpy.mod(uidx, dispFreq) == 0:
                print 'Epoch =', eidx, ',  Update =', uidx, ',  Cost =', cost
            if saveto and numpy.mod(uidx, saveFreq) == 0:
                print 'Saving... time ==', time.strftime(
                    '%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
                if best_p is not None:
                    params = best_p
                else:
                    params = unzip(tparams)
                numpy.savez(saveto, history_errs=history_cost, **params)
                pickle.dump(model_options, open('%s.pkl' % saveto, 'wb'), -1)
                print('Done')
    end_time = time.time()
    print 'end time ==', time.strftime('%Y-%m-%d %H:%M:%S',
                                       time.localtime(end_time))
    print 'Training finished! Cost time == ', end_time - start_time, ' s'
예제 #2
0
def proxEmbedBySubgraphs(
    trainingDataFile=main_dir + 'train_classmate',
    wordsEmbeddings_data=None,
    wordsEmbeddings_path=main_dir + 'facebook/nodesFeatures',
    subpaths_map=None,
    subpaths_file=main_dir + 'facebook/subpathsSaveFile',
    subgraphSaveFile='',
    maxlen_subpaths=1000,
    wordsSize=1000000,
    maxlen=100,
    batch_size=1,
    is_shuffle_for_batch=False,
    dispFreq=5,
    saveFreq=5,
    saveto=main_dir + 'facebook/path2vec-modelParams.npz',
    lrate=0.0001,
    word_dimension=22,
    dimension=64,
    discount_alpha=0.3,
    discount_beta=0.3,
    h_output_method='max-pooling',
    objective_function_method='hinge-loss',
    objective_function_param=0,
    max_epochs=10,
    decay=0.01,
):
    model_options = locals().copy()

    if wordsEmbeddings_data is None:
        if wordsEmbeddings_path is not None:
            wordsEmbeddings_data, word_dimension, wordsSize = dataProcessTools.getWordsEmbeddings(
                wordsEmbeddings_path)
        else:
            exit(0)
    trainingData, trainingPairs_data = dataProcessTools.getTrainingData(
        trainingDataFile)
    allBatches = dataProcessTools.get_minibatches_idx(len(trainingData),
                                                      batch_size,
                                                      is_shuffle_for_batch)

    subgraphs = dataProcessTools.readAllSubgraphDependencyAndSequencesWithLengths(
        subgraphSaveFile)

    params = init_sharedVariables(model_options)
    tparams = init_tparams(params)
    print 'Generate models ......'

    trainingPairs, sequences, masks, lengths, subgraph_lens, wordsEmbeddings, buffer_tensor, nodesLens, cost = proxEmbedBySubgraphModel.proxEmbedBySubgraphModel(
        model_options, tparams)

    print 'Generate gradients ......'
    grads = tensor.grad(cost, wrt=list(tparams.values()))
    print 'Using Adadelta to generate functions ......'
    this_time = time.time()
    print 'Start to compile and optimize, time ==', time.strftime(
        '%Y-%m-%d %H:%M:%S', time.localtime(this_time))
    lr = tensor.scalar(name='lr')
    f_grad_shared, f_update = adadelta(lr, tparams, grads, trainingPairs,
                                       sequences, masks, lengths,
                                       subgraph_lens, wordsEmbeddings,
                                       buffer_tensor, nodesLens, cost)

    print 'Start training models ......'
    best_p = None
    history_cost = []

    start_time = time.time()
    print 'start time ==', time.strftime('%Y-%m-%d %H:%M:%S',
                                         time.localtime(start_time))
    uidx = 0
    for eidx in range(max_epochs):
        for _, batch in allBatches:
            uidx += 1
            trainingDataForBatch = [trainingData[i] for i in batch]
            trainingPairsForBatch = [trainingPairs_data[i] for i in batch]
            tuples3DMatrix_data, x_data, mask_data, lens_data, subgraph_lens_data, buffer_tensor_data, nodesLens_data = dataProcessTools.generateSequenceAndMasksForSingleSequenceWithLength(
                trainingDataForBatch, trainingPairsForBatch, subgraphs,
                dimension)
            cost = f_grad_shared(tuples3DMatrix_data, x_data, mask_data,
                                 lens_data, subgraph_lens_data,
                                 wordsEmbeddings_data, buffer_tensor_data,
                                 nodesLens_data)
            f_update(lrate)

            if numpy.isnan(cost) or numpy.isinf(cost):
                print('bad cost detected: ', cost)
                return
            if numpy.mod(uidx, dispFreq) == 0:
                print 'Epoch =', eidx, ',  Update =', uidx, ',  Cost =', cost
                this_time = time.time()
                print 'Time ==', time.strftime('%Y-%m-%d %H:%M:%S',
                                               time.localtime(this_time))
            if saveto and numpy.mod(uidx, saveFreq) == 0:
                print('Saving...')
                if best_p is not None:
                    params = best_p
                else:
                    params = unzip(tparams)
                numpy.savez(saveto, history_errs=history_cost, **params)
                pickle.dump(model_options, open('%s.pkl' % saveto, 'wb'), -1)
                print('Done')
        gc.collect()

    end_time = time.time()
    print 'end time ==', time.strftime('%Y-%m-%d %H:%M:%S',
                                       time.localtime(end_time))
    print 'Training finished! Cost time == ', end_time - start_time, ' s'
def compute_proxEmbedBySubgraph(
                     wordsEmbeddings=None, 
                     wordsEmbeddings_path=None, 
                     word_dimension=0, 
                     dimension=0,
                     wordsSize=0, 
                     subpaths_map=None, 
                     subpaths_file=None,
                     subgraphs_file='', 
                     maxlen_subpaths=1000, 
                     maxlen=100,  # Sequence longer then this get ignored 
                     
                     test_data_file='', 
                     top_num=10, 
                     ideal_data_file='',
                     func=None, 
                   ):
    model_options = locals().copy()
    
    if wordsEmbeddings is None: 
        if wordsEmbeddings_path is not None: 
            wordsEmbeddings,word_dimension,wordsSize=dataProcessTools.getWordsEmbeddings(wordsEmbeddings_path)
        else: 
            exit(0) 

    subgraphs_map=dataProcessTools.readAllSubgraphDependencyAndSequencesWithLengths(subgraphs_file)
    
    line_count=0 
    test_map={} 
    print 'Compute MAP and nDCG for file ',test_data_file
    with open(test_data_file) as f: 
        for l in f: 
            arr=l.strip().split()
            query=int(arr[0]) 
            map={} 
            for i in range(1,len(arr)): 
                candidate=int(arr[i]) 
                sequences_data, mask_data, lens_data, subgraph_lens_data, buffer_tensor_data,nodesLens_data=dataProcessTools.prepareDataForTestForSubgraphSingleSequenceWithLengthsAsymmetric(query, candidate, subgraphs_map, dimension)
                if sequences_data is None and mask_data is None and lens_data is None: 
                    map[candidate]=-1000. 
                else: 
                    value=func(sequences_data, mask_data, lens_data, subgraph_lens_data, wordsEmbeddings, buffer_tensor_data, nodesLens_data) 
                    map[candidate]=value
            
            tops_in_line=toolsFunction.mapSortByValueDESC(map, top_num)
            test_map[line_count]=tops_in_line 
            line_count+=1 
                
    line_count=0 
    ideal_map={}
    with open(ideal_data_file) as f: 
        for l in f: 
            arr=l.strip().split()
            arr=[int(x) for x in arr] 
            ideal_map[line_count]=arr[1:] 
            line_count+=1 
    
    MAP=evaluateTools.get_MAP(top_num, ideal_map, test_map)
    MnDCG=evaluateTools.get_MnDCG(top_num, ideal_map, test_map)
    
    return MAP,MnDCG
    
    
    
    
    
예제 #4
0
def compute_metagraphAttention(
                     wordsEmbeddings=None, # words embeddings
                     wordsEmbeddings_path=None, # the file path of words embeddings
                     metagraphEmbeddings_path=None, # the file path of metagraph embeddings
                     wordsSize=0, # the size of words vocabulary
                     subpaths_map=None, # contains sub-paths
                     subpaths_file=None, # the file which contains sub-paths
                     maxlen_subpaths=1000, # the max length for sub-paths
                     
                     test_data_file='', # test data file
                     top_num=10, # top num in experiments
                     ideal_data_file='', # ideal data file
                     func=None, # the MPE process model
                   ):
    """
        evaluate the MPE model
    """
    model_options = locals().copy()
    
    if wordsEmbeddings is None: 
        if wordsEmbeddings_path is not None: 
            wordsEmbeddings,dimension,wordsSize=dataProcessTools.getWordsEmbeddings(wordsEmbeddings_path)
        else: 
            print 'There is not path for wordsEmbeddings, exit!!!'
            exit(0) 

    if subpaths_map is None: 
        if subpaths_file is not None:
            subpaths_map=dataProcessTools.loadAllSubPathsRomove0Path(subpaths_file, maxlen_subpaths, wordsEmbeddings)
        else: 
            print 'There is not path for sub-paths, exit!!!'
            exit(0)
            
    metagraphEmbedding_data, metagraphDimension, metagraphSize=dataProcessTools.getMetagraphEmbeddings(metagraphEmbeddings_path)

    line_count=0 
    test_map={} 
    print 'Compute MAP and nDCG for file ',test_data_file
    with open(test_data_file) as f: 
        for l in f: 
            arr=l.strip().split()
            query=int(arr[0]) 
            map={} 
            for i in range(1,len(arr)): 
                candidate=int(arr[i]) 
                subPaths_matrix_data,subPaths_mask_data,subPaths_lens_data=dataProcessTools.prepareDataForTest(query, candidate, subpaths_map)
                if subPaths_matrix_data is None and subPaths_mask_data is None and subPaths_lens_data is None: 
                    map[candidate]=-1000. 
                else: 
                    value=func(metagraphEmbedding_data, subPaths_matrix_data, subPaths_mask_data, wordsEmbeddings)
                    map[candidate]=value
                del subPaths_matrix_data
                del subPaths_mask_data
                del subPaths_lens_data
            tops_in_line=toolsFunction.mapSortByValueDESC(map, top_num)
            test_map[line_count]=tops_in_line 
            line_count+=1 
            map=None
            gc.collect()
                
    
    line_count=0 
    ideal_map={}
    with open(ideal_data_file) as f: 
        for l in f: 
            arr=l.strip().split()
            arr=[int(x) for x in arr] 
            ideal_map[line_count]=arr[1:] 
            line_count+=1 
    
    MAP=evaluateTools.get_MAP(top_num, ideal_map, test_map)
    MnDCG=evaluateTools.get_MnDCG(top_num, ideal_map, test_map)
    
    return MAP,MnDCG
    
    
예제 #5
0
def compute_proxEmbed(
        wordsEmbeddings=None,  # words embeddings
        wordsEmbeddings_path=None,  # the file path of words embeddings
        word_dimension=0,  #  dimension of words embeddings
        dimension=0,  # the dimension of paths embeddings
        wordsSize=0,  # the size of words vocabulary
        subpaths_map=None,  # contains sub-paths
        subpaths_file=None,  # the file which contains sub-paths
        maxlen_subpaths=1000,  # the max length for sub-paths
        maxlen=100,  # Sequence longer then this get ignored 
        test_data_file='',  # the file path of test data
        top_num=10,  # the top num to predict
        ideal_data_file='',  # ground truth
        func=None,  # model function
):
    """
    compute the result of the model
    """

    model_options = locals().copy()

    if wordsEmbeddings is None:
        if wordsEmbeddings_path is not None:
            wordsEmbeddings, dimension, wordsSize = dataProcessTools.getWordsEmbeddings(
                wordsEmbeddings_path)
        else:
            print 'There is not path for wordsEmbeddings, exit!!!'
            exit(0)

    if subpaths_map is None:
        if subpaths_file is not None:
            subpaths_map = dataProcessTools.loadAllSubPaths(
                subpaths_file, maxlen_subpaths)
        else:
            print 'There is not path for sub-paths, exit!!!'
            exit(0)

    line_count = 0
    test_map = {}
    print 'Compute MAP and nDCG for file ', test_data_file
    with open(test_data_file) as f:
        for l in f:
            arr = l.strip().split()
            query = int(arr[0])
            map = {}
            for i in range(1, len(arr)):
                candidate = int(arr[i])
                subPaths_matrix_data, subPaths_mask_data, subPaths_lens_data = dataProcessTools.prepareDataForTest(
                    query, candidate, subpaths_map)
                if subPaths_matrix_data is None and subPaths_mask_data is None and subPaths_lens_data is None:
                    map[candidate] = -1000.
                else:
                    value = func(subPaths_matrix_data, subPaths_mask_data,
                                 subPaths_lens_data, wordsEmbeddings)
                    map[candidate] = value

            tops_in_line = toolsFunction.mapSortByValueDESC(map, top_num)
            test_map[line_count] = tops_in_line
            line_count += 1

    line_count = 0
    ideal_map = {}
    with open(ideal_data_file) as f:
        for l in f:
            arr = l.strip().split()
            arr = [int(x) for x in arr]
            ideal_map[line_count] = arr[1:]
            line_count += 1

    MAP = evaluateTools.get_MAP(top_num, ideal_map, test_map)
    MnDCG = evaluateTools.get_MnDCG(top_num, ideal_map, test_map)

    return MAP, MnDCG
def experiment_for_batch(num):
    cf = ConfigParser.SafeConfigParser()
    cf.read("pythonParamsConfig")
    suffix = "5"
    index =  "1"
    class_name = cf.get("param", "class_name")  # the relation name of data
    trainingDataFile = os.path.join(main_dir + '/', dataset_name + '.splits', 'train.' + suffix,
                                    'train_' + class_name + '_' + '1')  # the full path of training data file. This path will be generated by main_dir, dataset_name, suffix, class_name and index.
    wordsEmbeddings = None  # words embeddings
    dimension = cf.getint("param", "dimension")  # the dimension of paths embeddings
    wordsSize = cf.getint("param", "wordsSize")  # the size of words vocabulary
    subpaths_map = None  # contains sub-paths
    # subpaths_file = cf.get("param", "subpaths_file")  # the file which contains sub-paths
    maxlen_subpaths = cf.getint("param", "maxlen_subpaths")  # the max length for sub-paths
    h_output_method = cf.get("param",
                             "h_output_method")  # the output way of lstm. There are three ways, "h" only uses the last output h as the output of lstm for one path; "mean-pooling" uses the mean-pooling of all hi as the output of lstm for one path; "max-pooling" uses the max-pooling of all hi as the output of lstm for one path.
    maxlen = cf.getint("param", "maxlen")  # Sequence longer than this get ignored
    # batch_size = cf.getint("param", "batch_size")  # use a batch for training. This is the size of this batch.
    is_shuffle_for_batch = cf.getboolean("param", "is_shuffle_for_batch")  # if need shuffle for training
    discount_alpha = cf.getfloat("param",
                                 "discount_alpha")  # the parameter alpha for discount. The longer the subpath, the little will the weight be.
    subpaths_pooling_method = cf.get("param",
                                     "subpaths_pooling_method")  # the ways to combine several subpaths to one. "mean-pooling" means to combine all subpaths to one by mean-pooling; "max-pooling" means to combine all subpaths to one by max-pooling.
    objective_function_method = cf.get("param", "objective_function_method")  # loss function, we use sigmoid
    objective_function_param = cf.getfloat("param", "objective_function_param")  # the parameter in loss function, beta
    lrate = cf.getfloat("param", "lrate")  # learning rate
    # max_epochs = cf.getint("param", "max_epochs")  # the max epochs for training

    dispFreq = cf.getint("param", "dispFreq")  # the frequences for display
    saveFreq = cf.getint("param", "saveFreq")  # the frequences for saving the parameters
    saveto = os.path.join(main_dir + '/', dataset_name + '.trainModels', 'train.' + suffix,
                          'train_' + class_name + '_' + index + '.npz')  # the path for saving parameters. It is generated by main_dir, dataset_name, suffix, class_name and index.

    # the normalization of this model, l2-norm of all parameters
    decay_lstm_W = cf.getfloat("param", "decay_lstm_W")
    decay_lstm_U = cf.getfloat("param", "decay_lstm_U")
    decay_lstm_b = cf.getfloat("param", "decay_lstm_b")
    decay_w = cf.getfloat("param", "decay_w")

    test_data_file = os.path.join(main_dir + '/', dataset_name + '.splits', 'test',
                                  'test_' + class_name + '_' + index)  # the file of test data
    top_num = cf.getint("param", "top_num")  # the top num to predict
    ideal_data_file = os.path.join(main_dir + '/', dataset_name + '.splits', 'ideal',
                                   'ideal_' + class_name + '_')  # the file of ground truth
    print("taringDatafile:", trainingDataFile)
    print("wordsEmbeddings_path:", wordsEmbeddings_path)
    test_file = []
    NDCG10 = []
    NDCG20 = []

    if wordsEmbeddings is None:
        if wordsEmbeddings_path is not None:
            wordsEmbeddings, dimension, wordsSize = dataProcessTools.getWordsEmbeddings(wordsEmbeddings_path)
            # print("wordsEmbeddings:", wordsEmbeddings.shape, dimension, wordsSize)
        else:
            print 'There is not path for wordsEmbeddings, exit!!!'
            exit(0)

    if subpaths_map is None:
        if subpaths_file is not None:
            subpaths_map = dataProcessTools.loadAllSubPaths(subpaths_file, maxlen_subpaths)
            # print("subpaths_map:", len(subpaths_map))
            # print(subpaths_map)
        else:
            print 'There is not path for sub-paths, exit!!!'
            exit(0)

    # 首先训练模型
    cost_time = proxEmbed2.proxEmbedTraining(
        trainingDataFile,
        wordsEmbeddings,
        wordsEmbeddings_path,
        word_dimension,
        dimension,
        wordsSize,
        subpaths_map,
        subpaths_file,
        maxlen_subpaths,
        h_output_method,
        maxlen,
        batch_size,
        is_shuffle_for_batch,
        discount_alpha,
        subpaths_pooling_method,
        objective_function_method,
        objective_function_param,
        lrate,
        max_epochs,

        dispFreq,
        saveFreq,
        saveto,

        decay_lstm_W,
        decay_lstm_U,
        decay_lstm_b,
        decay_w,

        num,
        dataset_name,
        class_name,
        main_dir
    )

    # load the function which is trained beforehand
    for num_of_group in range(num):
        num_of_group += 1
        suffix = str(num_of_group)
        index = str(num_of_group)
        saveto = os.path.join(main_dir + '/', dataset_name + '.trainModels', 'train.' + suffix,
                              'train_' + class_name + '_' + index + '.npz')  # the path for saving parameters. It is generated by main_dir, dataset_name, suffix, class_name and index.

        computeFunc = proxEmbedProcessAndAssess.get_proxEmbedModel(
            saveto,
            word_dimension,
            dimension,
            h_output_method,
            discount_alpha,
            subpaths_pooling_method,
        )
        # test the model    return MnDCG10, MnDCG20
        test_data_file = os.path.join(main_dir + '/', dataset_name + '.splits', 'test',
                                      'test_' + class_name + '_' + index)  # the file of test data
        ideal_data_file = os.path.join(main_dir + '/', dataset_name + '.splits', 'ideal',
                                       'ideal_' + class_name + '_' + index)  # the file of ground truth
        test_file.append(test_data_file)
        MnDCG10, MnDCG20 = proxEmbedProcessAndAssess.compute_proxEmbed(
            wordsEmbeddings,
            wordsEmbeddings_path,
            word_dimension,
            dimension,
            wordsSize,
            subpaths_map,
            subpaths_file,
            maxlen_subpaths,
            maxlen,

            test_data_file,
            top_num,
            ideal_data_file,
            func=computeFunc,
        )

        print 'MnDCG10==', MnDCG10
        print 'MnDCG20==', MnDCG20
        NDCG10.append(MnDCG10)
        NDCG20.append(MnDCG20)

    return list(zip(test_file, cost_time, NDCG10, NDCG20))
예제 #7
0
def compute_path2vec(
    wordsEmbeddings=None,
    wordsEmbeddings_path='None',
    typesEmbeddings=None,
    typesEmbeddings_path='None',
    word_dimension=0,
    type_dimension=0,
    dimension=0,
    attention_dimension=0,
    wordsSize=0,
    subpaths_map=None,
    subpaths_file='',
    sequences_map=None,
    sequences_file='',
    maxlen_subpaths=1000,
    maxlen=100,  # Sequence longer then this get ignored 
    alpha=0,
    beta=0,
    gamma=0,
    test_data_file='',
    top_num=10,
    ideal_data_file='',
    func=None,
):
    model_options = locals().copy()

    if wordsEmbeddings is None:
        if wordsEmbeddings_path is not None:
            wordsEmbeddings, dimension, wordsSize = dataProcessTools.getWordsEmbeddings(
                wordsEmbeddings_path)
        else:
            print 'Exit...'
            exit(0)
    if typesEmbeddings is None:
        if typesEmbeddings_path is not None:
            typesEmbeddings, type_dimension, wordsSize = dataProcessTools.getTypesEmbeddings(
                typesEmbeddings_path)
        else:
            print 'Exit...'
            exit(0)

    sequences_data = dataProcessTools.readAllSequencesFromFile(sequences_file)

    errCount = 0

    line_count = 0
    test_map = {}
    print 'Compute MAP and nDCG for file ', test_data_file
    with open(test_data_file) as f:
        for l in f:
            arr = l.strip().split()
            query = int(arr[0])
            map = {}
            candidates = []
            for i in range(1, len(arr)):
                key1 = arr[0] + '-' + arr[i]
                key2 = arr[i] + '-' + arr[0]
                if key1 in sequences_data or key2 in sequences_data:
                    candidates.append(int(arr[i]))
                else:
                    map[int(arr[i])] = -1000.
                    errCount += 1
            sequences_matrix, dependency_matrix, dependWeight_matrix, sequencesLen_vector, discountSeq_matrix, discountForEachNode_matrix, masks_matrix, group_tensor = dataProcessTools.prepareDataForTestBatch(
                query, candidates, sequences_data, alpha, beta, gamma)
            if len(sequences_matrix) > 0:
                scores = func(sequences_matrix, dependency_matrix,
                              dependWeight_matrix, sequencesLen_vector,
                              discountSeq_matrix, discountForEachNode_matrix,
                              wordsEmbeddings, typesEmbeddings, masks_matrix,
                              group_tensor)
                for index in range(len(candidates)):
                    map[candidates[index]] = scores[index]
            else:
                for i in range(1, len(arr)):
                    map[int(arr[i])] = -1.

            tops_in_line = toolsFunction.mapSortByValueDESC(map, top_num)
            test_map[line_count] = tops_in_line
            line_count += 1
            if line_count % 500 == 0:
                print '+',
                if line_count % 5000 == 0:
                    print ' time ==', time.strftime(
                        '%Y-%m-%d %H:%M:%S', time.localtime(time.time()))

    line_count = 0
    ideal_map = {}
    with open(ideal_data_file) as f:
        for l in f:
            arr = l.strip().split()
            arr = [int(x) for x in arr]
            ideal_map[line_count] = arr[1:]
            line_count += 1

    MAP = evaluateTools.get_MAP(top_num, ideal_map, test_map)
    MnDCG = evaluateTools.get_MnDCG(top_num, ideal_map, test_map)

    print 'errCount =', errCount
    return MAP, MnDCG
예제 #8
0
def metagraphAttentionTraining(
    trainingDataFile=main_dir +
    'facebook.splits/train.10/train_classmate_1',  # the full path of training data file
    metagraphEmbeddings_path='',  # the file path of metagraph embeddings
    wordsEmbeddings_data=None,  # words embeddings
    wordsEmbeddings_path=main_dir +
    'facebook/nodesFeatures',  # the file path of words embeddings
    wordsSize=1000000,  # the size of words vocabulary
    subpaths_map=None,  # contains sub-paths
    subpaths_file=main_dir +
    'facebook/subpathsSaveFile',  # the file which contains sub-paths
    maxlen_subpaths=1000,  # the max length for sub-paths
    maxlen=100,  # Sequence longer then this get ignored 
    batch_size=10,  # use a batch for training. This is the size of this batch.
    is_shuffle_for_batch=True,  # if need shuffle for training
    objective_function_method='sigmoid',  # loss function, we use sigmoid here
    objective_function_param=0,  # the parameter in loss function, beta
    lrate=0.0001,  # learning rate
    max_epochs=100,  # the max epochs for training
    dispFreq=5,  # the frequences for display
    saveFreq=5,  # the frequences for saving the parameters
    saveto=main_dir +
    'facebook/path2vec-modelParams.npz',  # the path for saving parameters. It is generated by main_dir, dataset_name, suffix, class_name and index.

    # all dimensions parameters
    metagraph_embedding_dimension=10,  # metagraph embedding dimension 
    dimension_A=10,  # the dimension of attention when computing the m-node embedding
    dimension_lstm=10,  # dimension of lstm parameters
    dimension_B=10,  # the dimension of attention when computing the m-path embedding
    dimension_C=10,  # the dimension of attention when computing the m-paths embedding

    # decay parameters
    decay_Q_A=0.001,
    decay_b_A=0.001,
    decay_eta_A=0.001,
    decay_lstm_W=0.001,
    decay_lstm_U=0.001,
    decay_lstm_b=0.001,
    decay_Q_B=0.001,
    decay_b_B=0.001,
    decay_eta_B=0.001,
    decay_Q_C=0.001,
    decay_b_C=0.001,
    decay_eta_C=0.001,
    decay_w=0.001,
):
    # get all parameters
    model_options = locals().copy()

    if wordsEmbeddings_data is None:
        if wordsEmbeddings_path is not None:
            wordsEmbeddings_data, dimension, wordsSize = dataProcessTools.getWordsEmbeddings(
                wordsEmbeddings_path)
        else:
            print 'There is not path for wordsEmbeddings, exit!!!'
            exit(0)

    if subpaths_map is None:
        if subpaths_file is not None:
            subpaths_map = dataProcessTools.loadAllSubPathsRomove0Path(
                subpaths_file, maxlen_subpaths, wordsEmbeddings_data)
        else:
            print 'There is not path for sub-paths, exit!!!'
            exit(0)

    metagraphEmbedding_data, metagraphDimension, metagraphSize = dataProcessTools.getMetagraphEmbeddings(
        metagraphEmbeddings_path)

    trainingData, trainingPairs_data = dataProcessTools.getTrainingData(
        trainingDataFile)
    allBatches = dataProcessTools.get_minibatches_idx(len(trainingData),
                                                      batch_size,
                                                      is_shuffle_for_batch)
    '''
        init shared variables
    '''
    params = init_sharedVariables(model_options)
    tparams = init_tparams(params)
    print 'Generate models ......'

    metagraphEmbeddings, trainingParis, subPaths_matrix, subPaths_mask, wordsEmbeddings, cost = subgraphAttentionModelLSTMBatch.metagraphAttentionModel(
        model_options, tparams)

    print 'Generate gradients ......'
    grads = tensor.grad(cost, wrt=list(tparams.values()))
    print 'Using Adadelta to generate functions ......'
    this_time = time.time()
    print 'Start to compile and optimize, time ==', time.strftime(
        '%Y-%m-%d %H:%M:%S', time.localtime(this_time))
    lr = tensor.scalar(name='lr')
    f_grad_shared, f_update = adadelta(lr, tparams, grads, metagraphEmbeddings,
                                       trainingParis, subPaths_matrix,
                                       subPaths_mask, wordsEmbeddings, cost)

    print 'Start training models ......'
    best_p = None
    history_cost = []  # not use

    start_time = time.time()
    print 'start time ==', time.strftime('%Y-%m-%d %H:%M:%S',
                                         time.localtime(start_time))
    uidx = 0
    for eidx in range(max_epochs):
        for _, batch in allBatches:
            uidx += 1
            # prepare data for this model
            trainingDataForBatch = [trainingData[i] for i in batch]
            trainingPairsForBatch = [trainingPairs_data[i] for i in batch]
            triples_matrix_data, subPaths_matrix_data, subPaths_mask_data, subPaths_lens_data = dataProcessTools.prepareDataForTraining(
                trainingDataForBatch, trainingPairsForBatch, subpaths_map)
            cost = 0
            cost = f_grad_shared(metagraphEmbedding_data, triples_matrix_data,
                                 subPaths_matrix_data, subPaths_mask_data,
                                 wordsEmbeddings_data)
            f_update(lrate)

            trainingDataForBatch = None
            trainingPairsForBatch = None
            del triples_matrix_data
            del subPaths_matrix_data
            del subPaths_mask_data
            del subPaths_lens_data

            if numpy.isnan(cost) or numpy.isinf(cost):
                print('bad cost detected: ', cost)
                return
            if numpy.mod(uidx, dispFreq) == 0:
                print 'Epoch =', eidx, ',  Update =', uidx, ',  Cost =', cost
                this_time = time.time()
                print 'Time ==', time.strftime('%Y-%m-%d %H:%M:%S',
                                               time.localtime(this_time))
            if saveto and numpy.mod(uidx, saveFreq) == 0:
                print('Saving...')
                if best_p is not None:
                    params = best_p
                else:
                    params = unzip(tparams)

                numpy.savez(saveto, history_errs=history_cost, **params)
                pickle.dump(model_options, open('%s.pkl' % saveto, 'wb'), -1)
                print('Done')
        gc.collect()

    end_time = time.time()
    print 'end time ==', time.strftime('%Y-%m-%d %H:%M:%S',
                                       time.localtime(end_time))
    print 'Training finished! Cost time == ', end_time - start_time, ' s'
예제 #9
0
def compute_path2vec(
                     wordsEmbeddings=None, 
                     wordsEmbeddings_path='None', 
                     typesEmbeddings=None,
                     typesEmbeddings_path='None',
                     word_dimension=0,
                     type_dimension=0,
                     dimension=0,
                     attention_dimension=0,
                     wordsSize=0,
                     subpaths_map=None, 
                     subpaths_file='',
                     sequences_map=None, 
                     sequences_file='',
                     maxlen_subpaths=1000, 
                     maxlen=100,  
                     alpha=0,
                     beta=0,
                     gamma=0,
                     
                     test_data_file='', 
                     top_num=10,
                     ideal_data_file='', 
                     func=None, 
                   ):
    model_options = locals().copy()
    
    
    if wordsEmbeddings is None:
        if wordsEmbeddings_path is not None: 
            wordsEmbeddings,dimension,wordsSize=dataProcessTools.getWordsEmbeddings(wordsEmbeddings_path)
        else: 
            exit(0) 
    if typesEmbeddings is None: 
        if typesEmbeddings_path is not None: 
            typesEmbeddings,type_dimension,wordsSize=dataProcessTools.getTypesEmbeddings(typesEmbeddings_path)
        else: 
            exit(0) 
            
    sequences_data=dataProcessTools.readAllSequencesFromFile(sequences_file)

    errCount=0 

    line_count=0 
    test_map={} 
    print 'Compute MAP and nDCG for file ',test_data_file
    with open(test_data_file) as f:
        for l in f: 
            arr=l.strip().split()
            query=int(arr[0]) 
            map={} 
            for i in range(1,len(arr)): 
                candidate=int(arr[i]) 
                sequences_matrix, dependency_matrix, dependWeight_matrix, sequencesLen_vector, discountSeq_matrix, discountForEachNode_matrix=dataProcessTools.prepareDataForTest(query, candidate, sequences_data, alpha, beta, gamma)
                if sequences_matrix is None or len(sequences_matrix)==0: 
                    map[candidate]=-1000. 
                    errCount+=1
                else: 
                    value=func(sequences_matrix, dependency_matrix, dependWeight_matrix, sequencesLen_vector, discountSeq_matrix, discountForEachNode_matrix,wordsEmbeddings,typesEmbeddings)
                    map[candidate]=value
            
            tops_in_line=toolsFunction.mapSortByValueDESC(map, top_num)
            test_map[line_count]=tops_in_line 
            line_count+=1 
                
    
    line_count=0
    ideal_map={}
    with open(ideal_data_file) as f: 
        for l in f: 
            arr=l.strip().split()
            arr=[int(x) for x in arr] 
            ideal_map[line_count]=arr[1:] 
            line_count+=1 
    
    MAP=evaluateTools.get_MAP(top_num, ideal_map, test_map)
    MnDCG=evaluateTools.get_MnDCG(top_num, ideal_map, test_map)
    
    print 'errCount =',errCount
    return MAP,MnDCG
    
    
    
    
    
def compute_proxEmbed(
        wordsEmbeddings=None,  # words embeddings
        wordsEmbeddings_path=None,  # the file path of words embeddings
        word_dimension=0,  # dimension of words embeddings
        dimension=0,  # the dimension of paths embeddings
        wordsSize=0,  # the size of words vocabulary
        subpaths_map=None,  # contains sub-paths
        subpaths_file=None,  # the file which contains sub-paths
        maxlen_subpaths=1000,  # the max length for sub-paths
        maxlen=100,  # Sequence longer then this get ignored

        test_data_file='',  # the file path of test data
        top_num=10,  # the top num to predict
        ideal_data_file='',  # ground truth
        func=None,  # model function
):
    """
    compute the result of the model
    """

    model_options = locals().copy()

    if wordsEmbeddings is None:
        if wordsEmbeddings_path is not None:
            wordsEmbeddings, dimension, wordsSize = dataProcessTools.getWordsEmbeddings(wordsEmbeddings_path)
        else:
            print 'There is not path for wordsEmbeddings, exit!!!'
            exit(0)

    if subpaths_map is None:
        if subpaths_file is not None:
            subpaths_map = dataProcessTools.loadAllSubPaths(subpaths_file, maxlen_subpaths)
        else:
            print 'There is not path for sub-paths, exit!!!'
            exit(0)

    line_count = 0
    test_map = {}
    print 'Compute MAP and nDCG for file ', test_data_file
    with open(test_data_file) as f:
        for l in f:
            arr = l.strip().split()
            query = int(arr[0])
            map = {}
            count_none = 0
            count_have = 0
            for i in range(1, len(arr)):
                candidate = int(arr[i])
                subPaths_matrix_data, subPaths_mask_data, subPaths_lens_data = dataProcessTools.prepareDataForTestAsymmetric(
                    query, candidate, subpaths_map)
                if subPaths_matrix_data is None and subPaths_mask_data is None and subPaths_lens_data is None:
                    map[candidate] = -1000.
                    # print(candidate, "subPaths_data is None")
                    count_none += 1
                else:
                    count_have += 1
                    value = func(subPaths_matrix_data, subPaths_mask_data, subPaths_lens_data, wordsEmbeddings)
                    map[candidate] = value
            print("count_none", count_none, "count_have", count_have)
            tops_in_line = toolsFunction.mapSortByValueDESC(map, top_num)
            test_map[line_count] = tops_in_line
            line_count += 1
            # print("map:", map)

    line_count = 0
    ideal_map = {}
    with open(ideal_data_file) as f:
        for l in f:
            arr = l.strip().split()
            arr = [int(x) for x in arr]
            ideal_map[line_count] = arr[1:]
            line_count += 1

            # MAP=evaluateTools.get_MAP(top_num, ideal_map, test_map)
    MnDCG10 = evaluateTools.get_MnDCG(10, ideal_map, test_map)
    MnDCG20 = evaluateTools.get_MnDCG(20, ideal_map, test_map)
    # print("top_num:", top_num)
    # print("ideal_map:", ideal_map)
    # print("test_map:", test_map)

    return MnDCG10, MnDCG20
예제 #11
0
def proxEmbedTraining(
    trainingDataFile=main_dir +
    'facebook.splits/train.10/train_classmate_1',  # the full path of training data file
    wordsEmbeddings=None,  # words embeddings
    wordsEmbeddings_path=main_dir +
    'facebook/nodesFeatures',  # the file path of words embeddings
    word_dimension=22,  # dimension of words embeddings
    dimension=64,  # the dimension of paths embeddings
    wordsSize=1000000,  # the size of words vocabulary
    subpaths_map=None,  # contains sub-paths
    subpaths_file=main_dir +
    'facebook/subpathsSaveFile',  # the file which contains sub-paths
    maxlen_subpaths=1000,  # the max length for sub-paths
    h_output_method='mean-pooling',  # the output way of lstm. There are three ways, "h" only uses the last output h as the output of lstm for one path; "mean-pooling" uses the mean-pooling of all hi as the output of lstm for one path; "max-pooling" uses the max-pooling of all hi as the output of lstm for one path.
    maxlen=100,  # Sequence longer then this get ignored 
    batch_size=1,  # use a batch for training. This is the size of this batch.
    is_shuffle_for_batch=False,  # if need shuffle for training
    discount_alpha=0.1,  # the parameter alpha for discount. The longer the subpath, the little will the weight be.
    subpaths_pooling_method='max-pooling',  # the ways to combine several subpaths to one. "mean-pooling" means to combine all subpaths to one by mean-pooling; "max-pooling" means to combine all subpaths to one by max-pooling.
    objective_function_method='hinge-loss',  # loss function, we use sigmoid
    objective_function_param=0,  # the parameter in loss function, beta
    lrate=0.0001,  # learning rate
    max_epochs=10,  # the max epochs for training
    dispFreq=5,  # the frequences for display
    saveFreq=5,  # the frequences for saving the parameters
    saveto=main_dir +
    'facebook/proxEmbed-modelParams.npz',  # the path for saving parameters. It is generated by main_dir, dataset_name, suffix, class_name and index.

    # the normalization of this model, l2-norm of all parameters
    decay_lstm_W=0.01,
    decay_lstm_U=0.01,
    decay_lstm_b=0.01,
    decay_w=0.01,
):
    """
    The training stage of ProxEmbed
    """
    model_options = locals().copy()

    if wordsEmbeddings is None:
        if wordsEmbeddings_path is not None:
            wordsEmbeddings, dimension, wordsSize = dataProcessTools.getWordsEmbeddings(
                wordsEmbeddings_path)
        else:
            print 'There is not path for wordsEmbeddings, exit!!!'
            exit(0)

    if subpaths_map is None:
        if subpaths_file is not None:
            subpaths_map = dataProcessTools.loadAllSubPaths(
                subpaths_file, maxlen_subpaths)
        else:
            print 'There is not path for sub-paths, exit!!!'
            exit(0)

    trainingData, trainingPairs = dataProcessTools.getTrainingData(
        trainingDataFile)
    allBatches = dataProcessTools.get_minibatches_idx(len(trainingData),
                                                      batch_size,
                                                      is_shuffle_for_batch)

    params = init_sharedVariables(model_options)
    tparams = init_tparams(params)
    print 'Generate models ......'

    trainingParis, subPaths_matrix, subPaths_mask, subPaths_lens, wemb, cost = proxEmbedModelMulti.proxEmbedModel(
        model_options, tparams)

    print 'Generate gradients ......'
    grads = tensor.grad(cost, wrt=list(tparams.values()))
    print 'Using Adadelta to generate functions ......'
    lr = tensor.scalar(name='lr')
    f_grad_shared, f_update = adadelta(lr, tparams, grads, trainingParis,
                                       subPaths_matrix, subPaths_mask,
                                       subPaths_lens, wemb, cost)

    print 'Start training models ......'
    best_p = None
    history_cost = []

    models_count = [0, 0, 0, 0]

    start_time = time.time()
    print 'start time ==', time.strftime('%Y-%m-%d %H:%M:%S',
                                         time.localtime(start_time))
    uidx = 0
    for eidx in range(max_epochs):
        for _, batch in allBatches:
            uidx += 1
            trainingDataForBatch = [trainingData[i] for i in batch]
            trainingPairsForBatch = [trainingPairs[i] for i in batch]
            triples_matrix_data, subPaths_matrix_data, subPaths_mask_data, subPaths_lens_data = dataProcessTools.prepareDataForTraining(
                trainingDataForBatch, trainingPairsForBatch, subpaths_map)
            cost = 0
            cost = f_grad_shared(triples_matrix_data, subPaths_matrix_data,
                                 subPaths_mask_data, subPaths_lens_data,
                                 wordsEmbeddings)
            f_update(lrate)

            if numpy.isnan(cost) or numpy.isinf(cost):
                print('bad cost detected: ', cost)
                return
            if numpy.mod(uidx, dispFreq) == 0:
                print 'Epoch =', eidx, ',  Update =', uidx, ',  Cost =', cost
                print 'models_count ==', models_count
            if saveto and numpy.mod(uidx, saveFreq) == 0:
                print('Saving...')
                if best_p is not None:
                    params = best_p
                else:
                    params = unzip(tparams)
                numpy.savez(saveto, history_errs=history_cost, **params)
                pickle.dump(model_options, open('%s.pkl' % saveto, 'wb'), -1)
                print('Done')
    end_time = time.time()
    print 'end time ==', time.strftime('%Y-%m-%d %H:%M:%S',
                                       time.localtime(end_time))
    print 'Training finished! Cost time == ', end_time - start_time, ' s'
예제 #12
0
def proxEmbedTraining(
        trainingDataFile=main_dir + 'facebook.splits/train.10/train_classmate_1',  # the full path of training data file
        wordsEmbeddings=None,  # words embeddings
        wordsEmbeddings_path=main_dir + 'facebook/nodesFeatures',  # the file path of words embeddings
        word_dimension=22,  # dimension of words embeddings
        dimension=64,  # the dimension of paths embeddings
        wordsSize=1000000,  # the size of words vocabulary
        subpaths_map=None,  # contains sub-paths
        subpaths_file=main_dir + 'facebook/subpathsSaveFile',  # the file which contains sub-paths
        maxlen_subpaths=1000,  # the max length for sub-paths
        h_output_method='mean-pooling',
        # the output way of lstm. There are three ways, "h" only uses the last output h as the output of lstm for one path; "mean-pooling" uses the mean-pooling of all hi as the output of lstm for one path; "max-pooling" uses the max-pooling of all hi as the output of lstm for one path.
        maxlen=100,  # Sequence longer then this get ignored
        batch_size=1,  # use a batch for training. This is the size of this batch.
        is_shuffle_for_batch=False,  # if need shuffle for training
        discount_alpha=0.1,  # the parameter alpha for discount. The longer the subpath, the little will the weight be.
        subpaths_pooling_method='max-pooling',
        # the ways to combine several subpaths to one. "mean-pooling" means to combine all subpaths to one by mean-pooling; "max-pooling" means to combine all subpaths to one by max-pooling.
        objective_function_method='hinge-loss',  # loss function, we use sigmoid
        objective_function_param=0,  # the parameter in loss function, beta
        lrate=0.0001,  # learning rate
        max_epochs=10,  # the max epochs for training

        dispFreq=5,  # the frequences for display
        saveFreq=5,  # the frequences for saving the parameters
        saveto=main_dir + 'facebook/proxEmbed-modelParams.npz',
        # the path for saving parameters. It is generated by main_dir, dataset_name, suffix, class_name and index.

        # the normalization of this model, l2-norm of all parameters
        decay_lstm_W=0.01,
        decay_lstm_U=0.01,
        decay_lstm_b=0.01,
        decay_w=0.01,

        num_group=0,
        dataset_name="",
        class_name="",
        main_dir = ""

):
    """
    The training stage of ProxEmbed
    """
    model_options = locals().copy()
    model_options.pop('wordsEmbeddings')
    print(model_options)

    if wordsEmbeddings is None:
        if wordsEmbeddings_path is not None:
            wordsEmbeddings, dimension, wordsSize = dataProcessTools.getWordsEmbeddings(wordsEmbeddings_path)
            # print("wordsEmbeddings:", wordsEmbeddings.shape, dimension, wordsSize)
        else:
            print 'There is not path for wordsEmbeddings, exit!!!'
            exit(0)

    if subpaths_map is None:
        if subpaths_file is not None:
            subpaths_map = dataProcessTools.loadAllSubPaths(subpaths_file, maxlen_subpaths)
            # print("subpaths_map:", len(subpaths_map))
            # print(subpaths_map)
        else:
            print 'There is not path for sub-paths, exit!!!'
            exit(0)

    cost_time = []

    for num_of_group in range(num_group):
        num_of_group += 1
        suffix = str(num_of_group)
        index = str(num_of_group)
        trainingDataFile = os.path.join(main_dir + '/', dataset_name + '.splits', "train." + suffix, 'train_' + class_name + '_' + '1')
        saveto = os.path.join(main_dir + '/', dataset_name + '.trainModels', 'train.' + suffix,
                              'train_' + class_name + '_' + index + '.npz')
        trainingData, trainingPairs = dataProcessTools.getTrainingData(trainingDataFile)
        allBatches = dataProcessTools.get_minibatches_idx(len(trainingData), batch_size, is_shuffle_for_batch)

        params = init_sharedVariables(model_options)
        tparams = init_tparams(params)
        print 'Generate models ......'

        trainingParis, subPaths_matrix, subPaths_mask, subPaths_lens, wemb, cost = proxEmbedModelMulti.proxEmbedModel(
            model_options, tparams)
        print("trainingParis:", type(trainingParis), trainingParis.shape)

        print 'Generate gradients ......'
        grads = tensor.grad(cost, wrt=list(tparams.values()))
        print 'Using Adadelta to generate functions ......'
        lr = tensor.scalar(name='lr')
        f_grad_shared, f_update = adadelta(lr, tparams, grads, trainingParis, subPaths_matrix, subPaths_mask,
                                           subPaths_lens,
                                           wemb, cost)

        print 'Start training models ......'
        best_p = None
        history_cost = []

        models_count = [0, 0, 0, 0]

        start_time = time.time()
        print 'start time ==', time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start_time))
        uidx = 0
        for eidx in range(max_epochs):
            for _, batch in allBatches:
                uidx += 1
                trainingDataForBatch = [trainingData[i] for i in batch]
                trainingPairsForBatch = [trainingPairs[i] for i in batch]
                triples_matrix_data, subPaths_matrix_data, subPaths_mask_data, subPaths_lens_data = dataProcessTools.prepareDataForTraining(
                    trainingDataForBatch, trainingPairsForBatch, subpaths_map)
                cost = 0
                cost = f_grad_shared(triples_matrix_data, subPaths_matrix_data, subPaths_mask_data, subPaths_lens_data,
                                     wordsEmbeddings)
                f_update(lrate)

                if numpy.isnan(cost) or numpy.isinf(cost):
                    print('bad cost detected: ', cost)
                    return
                if numpy.mod(uidx, dispFreq) == 0:
                    print 'Epoch =', eidx, ',  Update =', uidx, ',  Cost =', cost
                    print 'models_count ==', models_count
                if saveto and numpy.mod(uidx, saveFreq) == 0:
                    print('Saving...')
                    if best_p is not None:
                        params = best_p
                    else:
                        params = unzip(tparams)
                    numpy.savez(saveto, history_errs=history_cost, **params)
                    pickle.dump(model_options, open('%s.pkl' % saveto, 'wb'), -1)
                    print('Done')
        end_time = time.time()
        print 'end time ==', time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(end_time))
        print 'Training finished! Cost time == ', end_time - start_time, ' s'
        cost_time.append(end_time - start_time)
    return cost_time