def interactiveGRUTraining( trainingDataFile=main_dir + 'facebook.splits/train.10/train_classmate_1', wordsEmbeddings=None, wordsEmbeddings_path=main_dir + 'facebook/nodesFeatures', typesEmbeddings=None, typesEmbeddings_path='', word_dimension=22, type_dimension=20, dimension=64, attention_dimension=12, wordsSize=1000000, subpaths_map=None, subpaths_file=main_dir + 'facebook/subpathsSaveFile', sequences_map=None, sequences_file='', maxlen_subpaths=1000, maxlen=100, # Sequence longer then this get ignored batch_size=1, is_shuffle_for_batch=False, alpha=0.1, beta=0.1, gamma=0.1, objective_function_method='hinge-loss', objective_function_param=0, lrate=0.0001, max_epochs=10, dispFreq=5, saveFreq=5, saveto=main_dir + 'facebook/path2vec-modelParams.npz', decay=0.01, ): model_options = locals().copy() if wordsEmbeddings is None: if wordsEmbeddings_path is not None: wordsEmbeddings, dimension, wordsSize = dataProcessTools.getWordsEmbeddings( wordsEmbeddings_path) else: print 'Exit...' exit(0) if typesEmbeddings is None: if typesEmbeddings_path is not None: typesEmbeddings, type_dimension, wordsSize = dataProcessTools.getTypesEmbeddings( typesEmbeddings_path) else: print 'Exit...' exit(0) trainingData, trainingPairsData = dataProcessTools.getTrainingData( trainingDataFile) allBatches = dataProcessTools.get_minibatches_idx(len(trainingData), batch_size, is_shuffle_for_batch) sequences_data = dataProcessTools.readAllSequencesFromFile(sequences_file) params = init_sharedVariables(model_options) tparams = init_tparams(params) print 'Generate models ......' trainingParis, sequences_matrix, dependency_matrix, dependWeight_matrix, sequencesLen_vector, discountSeq_matrix, discountForEachNode_matrix, wordsEmbs, typesEmbs, masks_matrix, groups_tensor, cost = interactiveGRULearningBatch.interactiveGRULearning( model_options, tparams) print 'Generate gradients ......' grads = tensor.grad(cost, wrt=list(tparams.values())) print 'Using Adadelta to generate functions ......' lr = tensor.scalar(name='lr') f_grad_shared, f_update = adadelta( lr, tparams, grads, trainingParis, sequences_matrix, dependency_matrix, dependWeight_matrix, sequencesLen_vector, discountSeq_matrix, discountForEachNode_matrix, wordsEmbs, typesEmbs, masks_matrix, groups_tensor, cost) print 'Start training models ......' best_p = None history_cost = [] start_time = time.time() print 'start time ==', time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) uidx = 0 for eidx in range(max_epochs): for _, batch in allBatches: uidx += 1 trainingDataForBatch = [trainingData[i] for i in batch] trainingPairsForBatch = [trainingPairsData[i] for i in batch] trainingParis_data, sequences_matrix_data, dependency_matrix_data, dependWeight_matrix_data, sequencesLen_vector_data, discountSeq_matrix_data, discountForEachNode_matrix_data, masks_matrix_data, groups_tensor_data = dataProcessTools.prepareDataForTrainingBatch( trainingDataForBatch, trainingPairsForBatch, sequences_data, alpha, beta, gamma) if len(trainingParis_data) == 0: continue cost = f_grad_shared( trainingParis_data, sequences_matrix_data, dependency_matrix_data, dependWeight_matrix_data, sequencesLen_vector_data, discountSeq_matrix_data, discountForEachNode_matrix_data, wordsEmbeddings, typesEmbeddings, masks_matrix_data, groups_tensor_data) f_update(lrate) if numpy.isnan(cost) or numpy.isinf(cost): print('bad cost detected: ', cost) return if numpy.mod(uidx, dispFreq) == 0: print 'Epoch =', eidx, ', Update =', uidx, ', Cost =', cost if saveto and numpy.mod(uidx, saveFreq) == 0: print 'Saving... time ==', time.strftime( '%Y-%m-%d %H:%M:%S', time.localtime(time.time())) if best_p is not None: params = best_p else: params = unzip(tparams) numpy.savez(saveto, history_errs=history_cost, **params) pickle.dump(model_options, open('%s.pkl' % saveto, 'wb'), -1) print('Done') end_time = time.time() print 'end time ==', time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(end_time)) print 'Training finished! Cost time == ', end_time - start_time, ' s'
def proxEmbedBySubgraphs( trainingDataFile=main_dir + 'train_classmate', wordsEmbeddings_data=None, wordsEmbeddings_path=main_dir + 'facebook/nodesFeatures', subpaths_map=None, subpaths_file=main_dir + 'facebook/subpathsSaveFile', subgraphSaveFile='', maxlen_subpaths=1000, wordsSize=1000000, maxlen=100, batch_size=1, is_shuffle_for_batch=False, dispFreq=5, saveFreq=5, saveto=main_dir + 'facebook/path2vec-modelParams.npz', lrate=0.0001, word_dimension=22, dimension=64, discount_alpha=0.3, discount_beta=0.3, h_output_method='max-pooling', objective_function_method='hinge-loss', objective_function_param=0, max_epochs=10, decay=0.01, ): model_options = locals().copy() if wordsEmbeddings_data is None: if wordsEmbeddings_path is not None: wordsEmbeddings_data, word_dimension, wordsSize = dataProcessTools.getWordsEmbeddings( wordsEmbeddings_path) else: exit(0) trainingData, trainingPairs_data = dataProcessTools.getTrainingData( trainingDataFile) allBatches = dataProcessTools.get_minibatches_idx(len(trainingData), batch_size, is_shuffle_for_batch) subgraphs = dataProcessTools.readAllSubgraphDependencyAndSequencesWithLengths( subgraphSaveFile) params = init_sharedVariables(model_options) tparams = init_tparams(params) print 'Generate models ......' trainingPairs, sequences, masks, lengths, subgraph_lens, wordsEmbeddings, buffer_tensor, nodesLens, cost = proxEmbedBySubgraphModel.proxEmbedBySubgraphModel( model_options, tparams) print 'Generate gradients ......' grads = tensor.grad(cost, wrt=list(tparams.values())) print 'Using Adadelta to generate functions ......' this_time = time.time() print 'Start to compile and optimize, time ==', time.strftime( '%Y-%m-%d %H:%M:%S', time.localtime(this_time)) lr = tensor.scalar(name='lr') f_grad_shared, f_update = adadelta(lr, tparams, grads, trainingPairs, sequences, masks, lengths, subgraph_lens, wordsEmbeddings, buffer_tensor, nodesLens, cost) print 'Start training models ......' best_p = None history_cost = [] start_time = time.time() print 'start time ==', time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start_time)) uidx = 0 for eidx in range(max_epochs): for _, batch in allBatches: uidx += 1 trainingDataForBatch = [trainingData[i] for i in batch] trainingPairsForBatch = [trainingPairs_data[i] for i in batch] tuples3DMatrix_data, x_data, mask_data, lens_data, subgraph_lens_data, buffer_tensor_data, nodesLens_data = dataProcessTools.generateSequenceAndMasksForSingleSequenceWithLength( trainingDataForBatch, trainingPairsForBatch, subgraphs, dimension) cost = f_grad_shared(tuples3DMatrix_data, x_data, mask_data, lens_data, subgraph_lens_data, wordsEmbeddings_data, buffer_tensor_data, nodesLens_data) f_update(lrate) if numpy.isnan(cost) or numpy.isinf(cost): print('bad cost detected: ', cost) return if numpy.mod(uidx, dispFreq) == 0: print 'Epoch =', eidx, ', Update =', uidx, ', Cost =', cost this_time = time.time() print 'Time ==', time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(this_time)) if saveto and numpy.mod(uidx, saveFreq) == 0: print('Saving...') if best_p is not None: params = best_p else: params = unzip(tparams) numpy.savez(saveto, history_errs=history_cost, **params) pickle.dump(model_options, open('%s.pkl' % saveto, 'wb'), -1) print('Done') gc.collect() end_time = time.time() print 'end time ==', time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(end_time)) print 'Training finished! Cost time == ', end_time - start_time, ' s'
def proxEmbedTraining( trainingDataFile=main_dir + 'facebook.splits/train.10/train_classmate_1', # the full path of training data file wordsEmbeddings=None, # words embeddings wordsEmbeddings_path=main_dir + 'facebook/nodesFeatures', # the file path of words embeddings word_dimension=22, # dimension of words embeddings dimension=64, # the dimension of paths embeddings wordsSize=1000000, # the size of words vocabulary subpaths_map=None, # contains sub-paths subpaths_file=main_dir + 'facebook/subpathsSaveFile', # the file which contains sub-paths maxlen_subpaths=1000, # the max length for sub-paths h_output_method='mean-pooling', # the output way of lstm. There are three ways, "h" only uses the last output h as the output of lstm for one path; "mean-pooling" uses the mean-pooling of all hi as the output of lstm for one path; "max-pooling" uses the max-pooling of all hi as the output of lstm for one path. maxlen=100, # Sequence longer then this get ignored batch_size=1, # use a batch for training. This is the size of this batch. is_shuffle_for_batch=False, # if need shuffle for training discount_alpha=0.1, # the parameter alpha for discount. The longer the subpath, the little will the weight be. subpaths_pooling_method='max-pooling', # the ways to combine several subpaths to one. "mean-pooling" means to combine all subpaths to one by mean-pooling; "max-pooling" means to combine all subpaths to one by max-pooling. objective_function_method='hinge-loss', # loss function, we use sigmoid objective_function_param=0, # the parameter in loss function, beta lrate=0.0001, # learning rate max_epochs=10, # the max epochs for training dispFreq=5, # the frequences for display saveFreq=5, # the frequences for saving the parameters saveto=main_dir + 'facebook/proxEmbed-modelParams.npz', # the path for saving parameters. It is generated by main_dir, dataset_name, suffix, class_name and index. # the normalization of this model, l2-norm of all parameters decay_lstm_W=0.01, decay_lstm_U=0.01, decay_lstm_b=0.01, decay_w=0.01, ): """ The training stage of ProxEmbed """ model_options = locals().copy() if wordsEmbeddings is None: if wordsEmbeddings_path is not None: wordsEmbeddings, dimension, wordsSize = dataProcessTools.getWordsEmbeddings( wordsEmbeddings_path) else: print 'There is not path for wordsEmbeddings, exit!!!' exit(0) if subpaths_map is None: if subpaths_file is not None: subpaths_map = dataProcessTools.loadAllSubPaths( subpaths_file, maxlen_subpaths) else: print 'There is not path for sub-paths, exit!!!' exit(0) trainingData, trainingPairs = dataProcessTools.getTrainingData( trainingDataFile) allBatches = dataProcessTools.get_minibatches_idx(len(trainingData), batch_size, is_shuffle_for_batch) params = init_sharedVariables(model_options) tparams = init_tparams(params) print 'Generate models ......' trainingParis, subPaths_matrix, subPaths_mask, subPaths_lens, wemb, cost = proxEmbedModelMulti.proxEmbedModel( model_options, tparams) print 'Generate gradients ......' grads = tensor.grad(cost, wrt=list(tparams.values())) print 'Using Adadelta to generate functions ......' lr = tensor.scalar(name='lr') f_grad_shared, f_update = adadelta(lr, tparams, grads, trainingParis, subPaths_matrix, subPaths_mask, subPaths_lens, wemb, cost) print 'Start training models ......' best_p = None history_cost = [] models_count = [0, 0, 0, 0] start_time = time.time() print 'start time ==', time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start_time)) uidx = 0 for eidx in range(max_epochs): for _, batch in allBatches: uidx += 1 trainingDataForBatch = [trainingData[i] for i in batch] trainingPairsForBatch = [trainingPairs[i] for i in batch] triples_matrix_data, subPaths_matrix_data, subPaths_mask_data, subPaths_lens_data = dataProcessTools.prepareDataForTraining( trainingDataForBatch, trainingPairsForBatch, subpaths_map) cost = 0 cost = f_grad_shared(triples_matrix_data, subPaths_matrix_data, subPaths_mask_data, subPaths_lens_data, wordsEmbeddings) f_update(lrate) if numpy.isnan(cost) or numpy.isinf(cost): print('bad cost detected: ', cost) return if numpy.mod(uidx, dispFreq) == 0: print 'Epoch =', eidx, ', Update =', uidx, ', Cost =', cost print 'models_count ==', models_count if saveto and numpy.mod(uidx, saveFreq) == 0: print('Saving...') if best_p is not None: params = best_p else: params = unzip(tparams) numpy.savez(saveto, history_errs=history_cost, **params) pickle.dump(model_options, open('%s.pkl' % saveto, 'wb'), -1) print('Done') end_time = time.time() print 'end time ==', time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(end_time)) print 'Training finished! Cost time == ', end_time - start_time, ' s'
def metagraphAttentionTraining( trainingDataFile=main_dir + 'facebook.splits/train.10/train_classmate_1', # the full path of training data file metagraphEmbeddings_path='', # the file path of metagraph embeddings wordsEmbeddings_data=None, # words embeddings wordsEmbeddings_path=main_dir + 'facebook/nodesFeatures', # the file path of words embeddings wordsSize=1000000, # the size of words vocabulary subpaths_map=None, # contains sub-paths subpaths_file=main_dir + 'facebook/subpathsSaveFile', # the file which contains sub-paths maxlen_subpaths=1000, # the max length for sub-paths maxlen=100, # Sequence longer then this get ignored batch_size=10, # use a batch for training. This is the size of this batch. is_shuffle_for_batch=True, # if need shuffle for training objective_function_method='sigmoid', # loss function, we use sigmoid here objective_function_param=0, # the parameter in loss function, beta lrate=0.0001, # learning rate max_epochs=100, # the max epochs for training dispFreq=5, # the frequences for display saveFreq=5, # the frequences for saving the parameters saveto=main_dir + 'facebook/path2vec-modelParams.npz', # the path for saving parameters. It is generated by main_dir, dataset_name, suffix, class_name and index. # all dimensions parameters metagraph_embedding_dimension=10, # metagraph embedding dimension dimension_A=10, # the dimension of attention when computing the m-node embedding dimension_lstm=10, # dimension of lstm parameters dimension_B=10, # the dimension of attention when computing the m-path embedding dimension_C=10, # the dimension of attention when computing the m-paths embedding # decay parameters decay_Q_A=0.001, decay_b_A=0.001, decay_eta_A=0.001, decay_lstm_W=0.001, decay_lstm_U=0.001, decay_lstm_b=0.001, decay_Q_B=0.001, decay_b_B=0.001, decay_eta_B=0.001, decay_Q_C=0.001, decay_b_C=0.001, decay_eta_C=0.001, decay_w=0.001, ): # get all parameters model_options = locals().copy() if wordsEmbeddings_data is None: if wordsEmbeddings_path is not None: wordsEmbeddings_data, dimension, wordsSize = dataProcessTools.getWordsEmbeddings( wordsEmbeddings_path) else: print 'There is not path for wordsEmbeddings, exit!!!' exit(0) if subpaths_map is None: if subpaths_file is not None: subpaths_map = dataProcessTools.loadAllSubPathsRomove0Path( subpaths_file, maxlen_subpaths, wordsEmbeddings_data) else: print 'There is not path for sub-paths, exit!!!' exit(0) metagraphEmbedding_data, metagraphDimension, metagraphSize = dataProcessTools.getMetagraphEmbeddings( metagraphEmbeddings_path) trainingData, trainingPairs_data = dataProcessTools.getTrainingData( trainingDataFile) allBatches = dataProcessTools.get_minibatches_idx(len(trainingData), batch_size, is_shuffle_for_batch) ''' init shared variables ''' params = init_sharedVariables(model_options) tparams = init_tparams(params) print 'Generate models ......' metagraphEmbeddings, trainingParis, subPaths_matrix, subPaths_mask, wordsEmbeddings, cost = subgraphAttentionModelLSTMBatch.metagraphAttentionModel( model_options, tparams) print 'Generate gradients ......' grads = tensor.grad(cost, wrt=list(tparams.values())) print 'Using Adadelta to generate functions ......' this_time = time.time() print 'Start to compile and optimize, time ==', time.strftime( '%Y-%m-%d %H:%M:%S', time.localtime(this_time)) lr = tensor.scalar(name='lr') f_grad_shared, f_update = adadelta(lr, tparams, grads, metagraphEmbeddings, trainingParis, subPaths_matrix, subPaths_mask, wordsEmbeddings, cost) print 'Start training models ......' best_p = None history_cost = [] # not use start_time = time.time() print 'start time ==', time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start_time)) uidx = 0 for eidx in range(max_epochs): for _, batch in allBatches: uidx += 1 # prepare data for this model trainingDataForBatch = [trainingData[i] for i in batch] trainingPairsForBatch = [trainingPairs_data[i] for i in batch] triples_matrix_data, subPaths_matrix_data, subPaths_mask_data, subPaths_lens_data = dataProcessTools.prepareDataForTraining( trainingDataForBatch, trainingPairsForBatch, subpaths_map) cost = 0 cost = f_grad_shared(metagraphEmbedding_data, triples_matrix_data, subPaths_matrix_data, subPaths_mask_data, wordsEmbeddings_data) f_update(lrate) trainingDataForBatch = None trainingPairsForBatch = None del triples_matrix_data del subPaths_matrix_data del subPaths_mask_data del subPaths_lens_data if numpy.isnan(cost) or numpy.isinf(cost): print('bad cost detected: ', cost) return if numpy.mod(uidx, dispFreq) == 0: print 'Epoch =', eidx, ', Update =', uidx, ', Cost =', cost this_time = time.time() print 'Time ==', time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(this_time)) if saveto and numpy.mod(uidx, saveFreq) == 0: print('Saving...') if best_p is not None: params = best_p else: params = unzip(tparams) numpy.savez(saveto, history_errs=history_cost, **params) pickle.dump(model_options, open('%s.pkl' % saveto, 'wb'), -1) print('Done') gc.collect() end_time = time.time() print 'end time ==', time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(end_time)) print 'Training finished! Cost time == ', end_time - start_time, ' s'
def proxEmbedTraining( trainingDataFile=main_dir + 'facebook.splits/train.10/train_classmate_1', # the full path of training data file wordsEmbeddings=None, # words embeddings wordsEmbeddings_path=main_dir + 'facebook/nodesFeatures', # the file path of words embeddings word_dimension=22, # dimension of words embeddings dimension=64, # the dimension of paths embeddings wordsSize=1000000, # the size of words vocabulary subpaths_map=None, # contains sub-paths subpaths_file=main_dir + 'facebook/subpathsSaveFile', # the file which contains sub-paths maxlen_subpaths=1000, # the max length for sub-paths h_output_method='mean-pooling', # the output way of lstm. There are three ways, "h" only uses the last output h as the output of lstm for one path; "mean-pooling" uses the mean-pooling of all hi as the output of lstm for one path; "max-pooling" uses the max-pooling of all hi as the output of lstm for one path. maxlen=100, # Sequence longer then this get ignored batch_size=1, # use a batch for training. This is the size of this batch. is_shuffle_for_batch=False, # if need shuffle for training discount_alpha=0.1, # the parameter alpha for discount. The longer the subpath, the little will the weight be. subpaths_pooling_method='max-pooling', # the ways to combine several subpaths to one. "mean-pooling" means to combine all subpaths to one by mean-pooling; "max-pooling" means to combine all subpaths to one by max-pooling. objective_function_method='hinge-loss', # loss function, we use sigmoid objective_function_param=0, # the parameter in loss function, beta lrate=0.0001, # learning rate max_epochs=10, # the max epochs for training dispFreq=5, # the frequences for display saveFreq=5, # the frequences for saving the parameters saveto=main_dir + 'facebook/proxEmbed-modelParams.npz', # the path for saving parameters. It is generated by main_dir, dataset_name, suffix, class_name and index. # the normalization of this model, l2-norm of all parameters decay_lstm_W=0.01, decay_lstm_U=0.01, decay_lstm_b=0.01, decay_w=0.01, num_group=0, dataset_name="", class_name="", main_dir = "" ): """ The training stage of ProxEmbed """ model_options = locals().copy() model_options.pop('wordsEmbeddings') print(model_options) if wordsEmbeddings is None: if wordsEmbeddings_path is not None: wordsEmbeddings, dimension, wordsSize = dataProcessTools.getWordsEmbeddings(wordsEmbeddings_path) # print("wordsEmbeddings:", wordsEmbeddings.shape, dimension, wordsSize) else: print 'There is not path for wordsEmbeddings, exit!!!' exit(0) if subpaths_map is None: if subpaths_file is not None: subpaths_map = dataProcessTools.loadAllSubPaths(subpaths_file, maxlen_subpaths) # print("subpaths_map:", len(subpaths_map)) # print(subpaths_map) else: print 'There is not path for sub-paths, exit!!!' exit(0) cost_time = [] for num_of_group in range(num_group): num_of_group += 1 suffix = str(num_of_group) index = str(num_of_group) trainingDataFile = os.path.join(main_dir + '/', dataset_name + '.splits', "train." + suffix, 'train_' + class_name + '_' + '1') saveto = os.path.join(main_dir + '/', dataset_name + '.trainModels', 'train.' + suffix, 'train_' + class_name + '_' + index + '.npz') trainingData, trainingPairs = dataProcessTools.getTrainingData(trainingDataFile) allBatches = dataProcessTools.get_minibatches_idx(len(trainingData), batch_size, is_shuffle_for_batch) params = init_sharedVariables(model_options) tparams = init_tparams(params) print 'Generate models ......' trainingParis, subPaths_matrix, subPaths_mask, subPaths_lens, wemb, cost = proxEmbedModelMulti.proxEmbedModel( model_options, tparams) print("trainingParis:", type(trainingParis), trainingParis.shape) print 'Generate gradients ......' grads = tensor.grad(cost, wrt=list(tparams.values())) print 'Using Adadelta to generate functions ......' lr = tensor.scalar(name='lr') f_grad_shared, f_update = adadelta(lr, tparams, grads, trainingParis, subPaths_matrix, subPaths_mask, subPaths_lens, wemb, cost) print 'Start training models ......' best_p = None history_cost = [] models_count = [0, 0, 0, 0] start_time = time.time() print 'start time ==', time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start_time)) uidx = 0 for eidx in range(max_epochs): for _, batch in allBatches: uidx += 1 trainingDataForBatch = [trainingData[i] for i in batch] trainingPairsForBatch = [trainingPairs[i] for i in batch] triples_matrix_data, subPaths_matrix_data, subPaths_mask_data, subPaths_lens_data = dataProcessTools.prepareDataForTraining( trainingDataForBatch, trainingPairsForBatch, subpaths_map) cost = 0 cost = f_grad_shared(triples_matrix_data, subPaths_matrix_data, subPaths_mask_data, subPaths_lens_data, wordsEmbeddings) f_update(lrate) if numpy.isnan(cost) or numpy.isinf(cost): print('bad cost detected: ', cost) return if numpy.mod(uidx, dispFreq) == 0: print 'Epoch =', eidx, ', Update =', uidx, ', Cost =', cost print 'models_count ==', models_count if saveto and numpy.mod(uidx, saveFreq) == 0: print('Saving...') if best_p is not None: params = best_p else: params = unzip(tparams) numpy.savez(saveto, history_errs=history_cost, **params) pickle.dump(model_options, open('%s.pkl' % saveto, 'wb'), -1) print('Done') end_time = time.time() print 'end time ==', time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(end_time)) print 'Training finished! Cost time == ', end_time - start_time, ' s' cost_time.append(end_time - start_time) return cost_time