#Initializations import torch import torch.nn as nn import torch.optim as optim import torch.nn.functional as F import torchvision import torchvision.transforms as transforms import numpy as np import pdb torch.set_printoptions(linewidth=120) device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') #Parameters num_epochs = 5 num_classes = 10 batch_size = 100 learning_rate = 0.001 #Download Dataset train_set = torchvision.datasets.FashionMNIST(root='../../data/', train=True, transform=transforms.ToTensor()) #train_set = torchvision.datasets.FashionMNIST( # root='./data/FashionMNIST' # ,train=True # ,download=True #,transform=transforms.Compose([transforms.ToTensor()]) #) */ test_set = torchvision.datasets.FashionMNIST(root='../../data/', train=False, transform=transforms.ToTensor())
def __init__(self, topology, hyperparameters, configuration, dataset): np.set_printoptions(precision=2, linewidth=320) torch.set_printoptions(precision=2, linewidth=320) print('Parsing network settings...') t0 = 1e9 * time.time() assert type(topology['layer sizes'] == list) for ls in topology['layer sizes']: assert type(ls) == int assert ls > 0 self.layer_sizes = topology['layer sizes'] assert topology['network type'] in ['MLFF', 'SW_intra', 'SW_no_intra'] self.network_type = topology['network type'] if self.network_type in ['SW_intra', 'SW_no_intra']: assert type(topology['bypass p']) == float assert 0 <= topology['bypass p'] <= 1 assert type(topology['bypass mag']) == float assert topology['bypass mag'] >= 0 self.bypass_p = topology['bypass p'] self.bypass_mag = topology['bypass mag'] elif self.network_type in ['MLFF']: self.bypass_p = None self.bypass_mag = None else: assert False if type(hyperparameters['learning rate']) == float: assert hyperparameters['learning rate'] > 0 self.learning_rate = hyperparameters['learning rate'] elif type(hyperparameters['learning rate']) == list: for lr in hyperparameters['learning rate']: assert type(lr) == float assert lr > 0 assert len( hyperparameters['learning rate']) == (len(self.layer_sizes) - 1) self.learning_rate = hyperparameters['learning rate'] else: assert False assert type(hyperparameters['epsilon']) == float assert hyperparameters['epsilon'] > 0 self.epsilon = hyperparameters['epsilon'] assert type(hyperparameters['beta']) == float assert hyperparameters['beta'] > 0 self.beta = hyperparameters['beta'] assert type(hyperparameters['free iterations']) == int assert hyperparameters['free iterations'] > 0 self.free_iterations = hyperparameters['free iterations'] assert type(hyperparameters['weakly clamped iterations']) == int assert hyperparameters['weakly clamped iterations'] > 0 self.weakly_clamped_iterations = hyperparameters[ 'weakly clamped iterations'] assert type(configuration['batch size']) == int assert configuration['batch size'] > 0 self.batch_size = configuration['batch size'] assert configuration['device'] in ['cpu', 'CUDA:0', 'cuda'] self.device = configuration['device'] assert dataset in [ datasets.MNIST, datasets.FashionMNIST, datasets.Diabetes, datasets.Wine ] assert type(configuration['seed']) == int assert configuration['seed'] >= 0 self.seed = configuration['seed'] print('\tCompleted successfully') print('\tTime taken: %s' % (ttos((1e9 * time.time()) - t0))) print('Initializing network...') t0 = 1e9 * time.time() print('\tInitializing indices...') t1 = 1e9 * time.time() self.layer_indices = np.cumsum([0] + self.layer_sizes) self.num_neurons = np.sum(self.layer_sizes) self.ix = slice(0, self.layer_indices[1]) self.ih = slice(self.layer_indices[1], self.layer_indices[-2]) self.iy = slice(self.layer_indices[-2], self.layer_indices[-1]) self.ihy = slice(self.layer_indices[1], self.layer_indices[-1]) print('\t\tCompleted successfully') print('\t\tTime taken: %s' % (ttos((1e9 * time.time()) - t1))) print('\tInitializing seeds...') t1 = 1e9 * time.time() torch.manual_seed(seed=self.seed) np.random.seed(seed=self.seed) print('\t\tCompleted successfully') print('\t\tTime taken: %s' % (ttos((1e9 * time.time()) - t1))) print('\tInitializing dataset...') t1 = 1e9 * time.time() self.dataset = dataset(self.batch_size, self.device) print('\t\tCompleted successfully.') print('\t\tTime taken: %s' % (ttos((1e9 * time.time()) - t1))) print('\tInitializing state...') t1 = 1e9 * time.time() self.initialize_state() print('\t\tCompleted successfully') print('\t\tTime taken: %s' % (ttos((1e9 * time.time()) - t1))) print('\tInitializing persistent particles...') t1 = 1e9 * time.time() self.initialize_persistent_particles() print('\t\tCompleted successfully') print('\t\tTime taken: %s' % (ttos((1e9 * time.time()) - t1))) print('\tInitializing weights...') t1 = 1e9 * time.time() self.initialize_weight_matrices() print('\t\tCompleted successfully') print('\t\tTime taken: %s' % (ttos((1e9 * time.time()) - t1))) print('\tInitializing biases...') t1 = 1e9 * time.time() self.initialize_biases() print('\t\tCompleted successfully') print('\t\tTime taken: %s' % (ttos((1e9 * time.time()) - t1))) print('\tCompleted successfully.') print('\tTime taken: %s' % (ttos((1e9 * time.time()) - t0))) print('Network initialized successfully.') print('\tLayer sizes: %s' % ('-'.join([str(val) for val in self.layer_sizes]))) print('\tNetwork type: %s' % (self.network_type)) print('\tBypass p: ' + ('n/a' if (self.bypass_p == None) else '%f' % (self.bypass_p))) print('\tBypass magnitude: ' + ('n/a' if (self.bypass_mag == None) else '%f' % (self.bypass_mag))) print('\tLearning rate:', self.learning_rate) if type(self.learning_rate) == list: print('\t\tUsing per-layer rates.') elif type(self.learning_rate) == float: print('\t\tUsing a single global learning rate.') else: assert False print('\tEpsilon: %f' % (self.epsilon)) print('\tBeta: %f' % (self.beta)) print('\tFree iterations: %d' % (self.free_iterations)) print('\tWeakly-clamped iterations: %d' % (self.weakly_clamped_iterations)) print('\tDataset: %s' % (self.dataset.name)) print('\t\tInput: %d' % (self.dataset.n_in)) print('\t\tOutput: %d' % (self.dataset.n_out)) print('\t\tTraining batches: %d' % (self.dataset.n_trainb)) print('\t\tTest batches: %d' % (self.dataset.n_testb)) print('\t\tBatch size: %d' % (self.dataset.batch_size)) print('\t\tClassification: %r' % (self.dataset.classification)) print('\tBatch size: %d' % (self.batch_size)) print('\tDevice: %s' % (self.device)) print('\tSeed: %d' % (self.seed)) print('\tState:') print('\t\tRMS value: %f' % (rms(self.s))) print('\t\tShape: ' + ' x '.join([str(val) for val in list(self.s.shape)])) print('\tPersistent particles:') print('\t\tNumber of persistent particles: %d' % (len(self.persistent_particles))) print('\t\tMax RMS persistent particle: %f' % (np.max([rms(pp) for pp in self.persistent_particles]))) for pp in self.persistent_particles: assert pp.shape == self.persistent_particles[0].shape print('\t\tShape: ' + ' x '.join( [str(val) for val in self.persistent_particles[0].shape])) print('\tWeight matrices:') print('\t\tActual p: %.03f' % (self.p_actual)) print('\t\tRMS W element: %f' % (rms(self.W))) print('\t\tRMS W_mask element: %f' % (rms(self.W_mask))) print('\t\tW shape: ' + ' x '.join([str(val) for val in self.W.shape])) print('\t\tW_mask shape: ' + ' x '.join([str(val) for val in self.W_mask.shape])) for conn in self.interlayer_connections: assert conn.shape == self.interlayer_connections[0].shape print('\t\tInterlayer connection mask shape: ' + ' x '.join( [str(val) for val in self.interlayer_connections[0].shape]))
import numpy as np import torch torch.set_printoptions(profile="full") import torch.nn as nn from GAS.layer import DynamicLSTM import torch.nn.functional as F # 定义网络结构 class Modelmy(nn.Module): def __init__(self, config, glove_inti_embedding, auxiliary_metrix, gloss_id): super(Modelmy, self).__init__() '''config''' # self.batch_size = config.batch_size self.n_step_f = config.n_step_f self.n_step_b = config.n_step_b self.embedding_size = config.embedding_size self.max_n_sense = config.max_n_sense self.max_gloss_words = config.max_gloss_words self.drop_out = config.dropout self.HD = config.LSTMHD # emp_shape # 300=>100 self.sense_to_gloss_id = auxiliary_metrix[0].to("cuda") # (2979) self.word_to_sense_id = auxiliary_metrix[1].to("cuda") # (653,10) self.gloss_to_word_id = auxiliary_metrix[2].to("cuda") # (2700,6) self.gloss_to_word_mask = auxiliary_metrix[3].to("cuda") # (2700,100) self.sense_mask = auxiliary_metrix[4].to("cuda") # (2700,6,10) self.hook_mask = self.sense_mask self.init_alpha = auxiliary_metrix[5].to("cuda") # (2700,6,10) self.gloss_id = gloss_id '''embedding'''
forces: Optional[Tensor] = None # noqa: E701 hessians: Optional[Tensor] = None if return_forces or return_hessians: grad = torch.autograd.grad([energies.sum()], [coordinates], create_graph=return_hessians)[0] assert grad is not None forces = -grad if return_hessians: hessians = torchani.utils.hessian(coordinates, forces=forces) return energies, forces, hessians custom_model = CustomModule() compiled_custom_model = torch.jit.script(custom_model) torch.jit.save(compiled_custom_model, 'compiled_custom_model.pt') loaded_compiled_custom_model = torch.jit.load('compiled_custom_model.pt') energies, forces, hessians = custom_model(species, coordinates, True, True) energies_jit, forces_jit, hessians_jit = loaded_compiled_custom_model( species, coordinates, True, True) print('Energy, eager mode vs loaded jit:', energies.item(), energies_jit.item()) print() print('Force, eager mode vs loaded jit:\n', forces.squeeze(0), '\n', forces_jit.squeeze(0)) print() torch.set_printoptions(sci_mode=False, linewidth=1000) print('Hessian, eager mode vs loaded jit:\n', hessians.squeeze(0), '\n', hessians_jit.squeeze(0))
import cv2 import matplotlib import matplotlib.pyplot as plt import numpy as np import torch import torch.nn as nn import torchvision from tqdm import tqdm from . import torch_utils # , google_utils matplotlib.rc('font', **{'size': 11}) # Set printoptions torch.set_printoptions(linewidth=320, precision=5, profile='long') np.set_printoptions(linewidth=320, formatter={'float_kind': '{:11.5g}'.format }) # format short g, %precision=5 # Prevent OpenCV from multithreading (to use PyTorch DataLoader) cv2.setNumThreads(0) def floatn(x, n=3): # format floats to n decimals return float(format(x, '.%gf' % n)) def init_seeds(seed=0): random.seed(seed) np.random.seed(seed)
def main(): parser = argparse.ArgumentParser(description='Summarization Model') # Where to find data parser.add_argument('--data_path', type=str, default='/remote-home/dqwang/Datasets/CNNDM/train.label.jsonl', help='Path expression to pickle datafiles.') parser.add_argument('--valid_path', type=str, default='/remote-home/dqwang/Datasets/CNNDM/val.label.jsonl', help='Path expression to pickle valid datafiles.') parser.add_argument('--vocab_path', type=str, default='/remote-home/dqwang/Datasets/CNNDM/vocab', help='Path expression to text vocabulary file.') # Important settings parser.add_argument('--mode', choices=['train', 'test'], default='train', help='must be one of train/test') parser.add_argument('--embedding', type=str, default='glove', choices=['word2vec', 'glove', 'elmo', 'bert'], help='must be one of word2vec/glove/elmo/bert') parser.add_argument('--sentence_encoder', type=str, default='transformer', choices=['bilstm', 'deeplstm', 'transformer'], help='must be one of LSTM/Transformer') parser.add_argument('--sentence_decoder', type=str, default='SeqLab', choices=['PN', 'SeqLab'], help='must be one of PN/SeqLab') parser.add_argument('--restore_model', type=str , default='None', help='Restore model for further training. [bestmodel/bestFmodel/earlystop/None]') # Where to save output parser.add_argument('--save_root', type=str, default='save/', help='Root directory for all model.') parser.add_argument('--log_root', type=str, default='log/', help='Root directory for all logging.') # Hyperparameters parser.add_argument('--gpu', type=str, default='0', help='GPU ID to use. For cpu, set -1 [default: -1]') parser.add_argument('--cuda', action='store_true', default=False, help='use cuda') parser.add_argument('--vocab_size', type=int, default=100000, help='Size of vocabulary. These will be read from the vocabulary file in order. If the vocabulary file contains fewer words than this number, or if this number is set to 0, will take all words in the vocabulary file.') parser.add_argument('--n_epochs', type=int, default=20, help='Number of epochs [default: 20]') parser.add_argument('--batch_size', type=int, default=32, help='Mini batch size [default: 128]') parser.add_argument('--word_embedding', action='store_true', default=True, help='whether to use Word embedding') parser.add_argument('--embedding_path', type=str, default='/remote-home/dqwang/Glove/glove.42B.300d.txt', help='Path expression to external word embedding.') parser.add_argument('--word_emb_dim', type=int, default=300, help='Word embedding size [default: 200]') parser.add_argument('--embed_train', action='store_true', default=False, help='whether to train Word embedding [default: False]') parser.add_argument('--min_kernel_size', type=int, default=1, help='kernel min length for CNN [default:1]') parser.add_argument('--max_kernel_size', type=int, default=7, help='kernel max length for CNN [default:7]') parser.add_argument('--output_channel', type=int, default=50, help='output channel: repeated times for one kernel') parser.add_argument('--use_orthnormal_init', action='store_true', default=True, help='use orthnormal init for lstm [default: true]') parser.add_argument('--sent_max_len', type=int, default=100, help='max length of sentences (max source text sentence tokens)') parser.add_argument('--doc_max_timesteps', type=int, default=50, help='max length of documents (max timesteps of documents)') parser.add_argument('--save_label', action='store_true', default=False, help='require multihead attention') # Training parser.add_argument('--lr', type=float, default=0.0001, help='learning rate') parser.add_argument('--lr_descent', action='store_true', default=False, help='learning rate descent') parser.add_argument('--warmup_steps', type=int, default=4000, help='warmup_steps') parser.add_argument('--grad_clip', action='store_true', default=False, help='for gradient clipping') parser.add_argument('--max_grad_norm', type=float, default=10, help='for gradient clipping max gradient normalization') # test parser.add_argument('-m', type=int, default=3, help='decode summary length') parser.add_argument('--limited', action='store_true', default=False, help='limited decode summary length') parser.add_argument('--test_model', type=str, default='evalbestmodel', help='choose different model to test [evalbestmodel/evalbestFmodel/trainbestmodel/trainbestFmodel/earlystop]') parser.add_argument('--use_pyrouge', action='store_true', default=False, help='use_pyrouge') args = parser.parse_args() os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu torch.set_printoptions(threshold=50000) # File paths DATA_FILE = args.data_path VALID_FILE = args.valid_path VOCAL_FILE = args.vocab_path LOG_PATH = args.log_root # train_log setting if not os.path.exists(LOG_PATH): if args.mode == "train": os.makedirs(LOG_PATH) else: logger.exception("[Error] Logdir %s doesn't exist. Run in train mode to create it.", LOG_PATH) raise Exception("[Error] Logdir %s doesn't exist. Run in train mode to create it." % (LOG_PATH)) nowTime=datetime.datetime.now().strftime('%Y%m%d_%H%M%S') log_path = os.path.join(LOG_PATH, args.mode + "_" + nowTime) file_handler = logging.FileHandler(log_path) file_handler.setFormatter(formatter) logger.addHandler(file_handler) logger.info("Pytorch %s", torch.__version__) sum_loader = SummarizationLoader() hps = args if hps.mode == 'test': paths = {"test": DATA_FILE} hps.recurrent_dropout_prob = 0.0 hps.atten_dropout_prob = 0.0 hps.ffn_dropout_prob = 0.0 logger.info(hps) else: paths = {"train": DATA_FILE, "valid": VALID_FILE} dataInfo = sum_loader.process(paths=paths, vocab_size=hps.vocab_size, vocab_path=VOCAL_FILE, sent_max_len=hps.sent_max_len, doc_max_timesteps=hps.doc_max_timesteps, load_vocab=os.path.exists(VOCAL_FILE)) if args.embedding == "glove": vocab = dataInfo.vocabs["vocab"] embed = torch.nn.Embedding(len(vocab), hps.word_emb_dim) if hps.word_embedding: embed_loader = EmbedLoader() pretrained_weight = embed_loader.load_with_vocab(hps.embedding_path, vocab) # unfound with random init embed.weight.data.copy_(torch.from_numpy(pretrained_weight)) embed.weight.requires_grad = hps.embed_train else: logger.error("[ERROR] embedding To Be Continued!") sys.exit(1) if args.sentence_encoder == "transformer" and args.sentence_decoder == "SeqLab": model_param = json.load(open("config/transformer.config", "rb")) hps.__dict__.update(model_param) model = TransformerModel(hps, embed) else: logger.error("[ERROR] Model To Be Continued!") sys.exit(1) logger.info(hps) if hps.cuda: model = model.cuda() logger.info("[INFO] Use cuda") if hps.mode == 'train': dataInfo.datasets["valid"].set_target("text", "summary") setup_training(model, dataInfo.datasets["train"], dataInfo.datasets["valid"], hps) elif hps.mode == 'test': logger.info("[INFO] Decoding...") dataInfo.datasets["test"].set_target("text", "summary") run_test(model, dataInfo.datasets["test"], hps, limited=hps.limited) else: logger.error("The 'mode' flag must be one of train/eval/test") raise ValueError("The 'mode' flag must be one of train/eval/test")
import numpy as np import math import torch as tf import torch.utils.data import torch.nn as nn import torch.optim as optim import os import gc import time from ctypes import * from class_and_function import * default_dtype = tf.float64 tf.set_default_dtype(default_dtype) tf.set_printoptions(precision=10) device = tf.device('cuda' if torch.cuda.is_available() else 'cpu') #device = tf.device('cpu') if (device != tf.device('cpu')): print("cuDNN version: ", tf.backends.cudnn.version()) # tf.backends.cudnn.enabled = False #tf.backends.cudnn.benchmark = True MULTIPLIER = tf.cuda.device_count() else: MULTIPLIER = 1 #if (hvd.rank() == 0): if (True): f_out = open("./LOSS.OUT", "w") f_out.close()
import model_utils import utils import data_utils import loss import check_point import argparse import torch import torch.nn as nn from torch import optim import numpy as np import scipy.sparse as sp import random import os torch.set_printoptions(precision=8) parser = argparse.ArgumentParser(description='Train model') parser.add_argument('--batch_size', type=int, help='batch size of data set (default:32)', default=32) parser.add_argument('--rnn_units', type=int, help='number units of hidden size lstm', default=16) parser.add_argument('--rnn_layers', type=int, help='number layers of RNN', default=1) parser.add_argument('--num_channels',
def setup(seed): seed_all(seed) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False torch.set_printoptions(precision=5, linewidth=1000)
import argparse import os import sys import traceback import math import time from datetime import datetime from enum import IntEnum from copy import deepcopy as dcp from mpi4py import MPI import torch torch.set_printoptions(precision = 4, threshold = 5000, edgeitems = 5, linewidth = 160) torch.multiprocessing.set_start_method('spawn') import torch.nn.functional as tnf import torchvision from tensorboardX import SummaryWriter import cortex.random as Rand import cortex.statistics as Stat import cortex.functions as Func import cortex.containers as Cont import cortex.network as cn import cortex.layer as cl import cortex.species as cs
def test_rrpn(self): torch.manual_seed(121) cfg = get_cfg() cfg.MODEL.PROPOSAL_GENERATOR.NAME = "RRPN" cfg.MODEL.ANCHOR_GENERATOR.NAME = "RotatedAnchorGenerator" cfg.MODEL.ANCHOR_GENERATOR.SIZES = [[32, 64]] cfg.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS = [[0.25, 1]] cfg.MODEL.ANCHOR_GENERATOR.ANGLES = [[0, 60]] cfg.MODEL.RPN.BBOX_REG_WEIGHTS = (1, 1, 1, 1, 1) cfg.MODEL.RPN.HEAD_NAME = "StandardRPNHead" backbone = build_backbone(cfg) proposal_generator = build_proposal_generator(cfg, backbone.output_shape()) num_images = 2 images_tensor = torch.rand(num_images, 20, 30) image_sizes = [(10, 10), (20, 30)] images = ImageList(images_tensor, image_sizes) image_shape = (15, 15) num_channels = 1024 features = {"res4": torch.rand(num_images, num_channels, 1, 2)} gt_boxes = torch.tensor([[2, 2, 2, 2, 0], [4, 4, 4, 4, 0]], dtype=torch.float32) gt_instances = Instances(image_shape) gt_instances.gt_boxes = RotatedBoxes(gt_boxes) with EventStorage(): # capture events in a new storage to discard them proposals, proposal_losses = proposal_generator( images, features, [gt_instances[0], gt_instances[1]]) expected_losses = { "loss_rpn_cls": torch.tensor(0.0432923734), "loss_rpn_loc": torch.tensor(0.1552739739), } for name in expected_losses.keys(): assert torch.allclose(proposal_losses[name], expected_losses[name]) expected_proposal_boxes = [ RotatedBoxes( torch.tensor([ [ 0.60189795, 1.24095452, 61.98131943, 18.03621292, -4.07244873 ], [ 15.64940453, 1.69624567, 59.59749603, 16.34339333, 2.62692475 ], [ -3.02982378, -2.69752932, 67.90952301, 59.62455750, 59.97010040 ], [ 16.71863365, 1.98309708, 35.61507797, 32.81484985, 62.92267227 ], [ 0.49432933, -7.92979717, 67.77606201, 62.93098450, -1.85656738 ], [ 8.00880814, 1.36017394, 121.81007385, 32.74150467, 50.44297409 ], [ 16.44299889, -4.82221127, 63.39775848, 61.22503662, 54.12270737 ], [ 5.00000000, 5.00000000, 10.00000000, 10.00000000, -0.76943970 ], [ 17.64130402, -0.98095351, 61.40377808, 16.28918839, 55.53118134 ], [ 0.13016054, 4.60568953, 35.80157471, 32.30180359, 62.52872086 ], [ -4.26460743, 0.39604485, 124.30079651, 31.84611320, -1.58203125 ], [ 7.52815342, -0.91636634, 62.39784622, 15.45565224, 60.79549789 ], ])), RotatedBoxes( torch.tensor([ [ 0.07734215, 0.81635046, 65.33510590, 17.34688377, -1.51821899 ], [ -3.41833067, -3.11320257, 64.17595673, 60.55617905, 58.27033234 ], [ 20.67383385, -6.16561556, 63.60531998, 62.52315903, 54.85546494 ], [ 15.00000000, 10.00000000, 30.00000000, 20.00000000, -0.18218994 ], [ 9.22646523, -6.84775209, 62.09895706, 65.46472931, -2.74307251 ], [ 15.00000000, 4.93451595, 30.00000000, 9.86903191, -0.60272217 ], [ 8.88342094, 2.65560246, 120.95362854, 32.45022202, 55.75970078 ], [ 16.39088631, 2.33887148, 34.78761292, 35.61492920, 60.81977463 ], [ 9.78298569, 10.00000000, 19.56597137, 20.00000000, -0.86660767 ], [ 1.28576660, 5.49873352, 34.93610382, 33.22600174, 60.51599884 ], [ 17.58912468, -1.63270092, 62.96052551, 16.45713997, 52.91245270 ], [ 5.64749718, -1.90428460, 62.37649155, 16.19474792, 61.09543991 ], [ 0.82255805, 2.34931135, 118.83985901, 32.83671188, 56.50753784 ], [ -5.33874989, 1.64404404, 125.28501892, 33.35424042, -2.80731201 ], ])), ] expected_objectness_logits = [ torch.tensor([ 0.10111768, 0.09112845, 0.08466332, 0.07589971, 0.06650183, 0.06350251, 0.04299347, 0.01864817, 0.00986163, 0.00078543, -0.04573630, -0.04799230, ]), torch.tensor([ 0.11373727, 0.09377633, 0.05281663, 0.05143715, 0.04040275, 0.03250912, 0.01307789, 0.01177734, 0.00038105, -0.00540255, -0.01194804, -0.01461012, -0.03061717, -0.03599222, ]), ] torch.set_printoptions(precision=8, sci_mode=False) for i in range(len(image_sizes)): assert len(proposals[i]) == len(expected_proposal_boxes[i]) assert proposals[i].image_size == (image_sizes[i][0], image_sizes[i][1]) # It seems that there's some randomness in the result across different machines: # This test can be run on a local machine for 100 times with exactly the same result, # However, a different machine might produce slightly different results, # thus the atol here. err_msg = "computed proposal boxes = {}, expected {}".format( proposals[i].proposal_boxes.tensor, expected_proposal_boxes[i].tensor) assert torch.allclose(proposals[i].proposal_boxes.tensor, expected_proposal_boxes[i].tensor, atol=1e-5), err_msg err_msg = "computed objectness logits = {}, expected {}".format( proposals[i].objectness_logits, expected_objectness_logits[i]) assert torch.allclose(proposals[i].objectness_logits, expected_objectness_logits[i], atol=1e-5), err_msg
def main(): global best_prec1, args args = parse() print("opt_level = {}".format(args.opt_level)) print("keep_batchnorm_fp32 = {}".format(args.keep_batchnorm_fp32), type(args.keep_batchnorm_fp32)) print("loss_scale = {}".format(args.loss_scale), type(args.loss_scale)) print("\nCUDNN VERSION: {}\n".format(torch.backends.cudnn.version())) cudnn.benchmark = True best_prec1 = 0 if args.deterministic: cudnn.benchmark = False cudnn.deterministic = True torch.manual_seed(args.local_rank) torch.set_printoptions(precision=10) args.distributed = False if 'WORLD_SIZE' in os.environ: args.distributed = int(os.environ['WORLD_SIZE']) > 1 args.gpu = 0 args.world_size = 1 if args.distributed: args.gpu = args.local_rank torch.cuda.set_device(args.gpu) torch.distributed.init_process_group(backend='nccl', init_method='env://') args.world_size = torch.distributed.get_world_size() assert torch.backends.cudnn.enabled, "Amp requires cudnn backend to be enabled." if args.channels_last: memory_format = torch.channels_last else: memory_format = torch.contiguous_format # create model if args.pretrained: print("=> using pre-trained model '{}'".format(args.arch)) model = models.__dict__[args.arch](pretrained=True) else: print("=> creating model '{}'".format(args.arch)) model = models.__dict__[args.arch]() if args.sync_bn: import apex print("using apex synced BN") model = apex.parallel.convert_syncbn_model(model) model = model.cuda().to(memory_format=memory_format) # Scale learning rate based on global batch size args.lr = args.lr*float(args.batch_size*args.world_size)/256. optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay) # Initialize Amp. Amp accepts either values or strings for the optional override arguments, # for convenient interoperation with argparse. model, optimizer = amp.initialize(model, optimizer, opt_level=args.opt_level, keep_batchnorm_fp32=args.keep_batchnorm_fp32, loss_scale=args.loss_scale ) # For distributed training, wrap the model with apex.parallel.DistributedDataParallel. # This must be done AFTER the call to amp.initialize. If model = DDP(model) is called # before model, ... = amp.initialize(model, ...), the call to amp.initialize may alter # the types of model's parameters in a way that disrupts or destroys DDP's allreduce hooks. if args.distributed: # By default, apex.parallel.DistributedDataParallel overlaps communication with # computation in the backward pass. # model = DDP(model) # delay_allreduce delays all communication to the end of the backward pass. model = DDP(model, delay_allreduce=True) # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().cuda() # Optionally resume from a checkpoint if args.resume: # Use a local scope to avoid dangling references def resume(): if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume, map_location = lambda storage, loc: storage.cuda(args.gpu)) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})" .format(args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) resume() # Data loading code traindir = os.path.join(args.data, 'train') valdir = os.path.join(args.data, 'val') if(args.arch == "inception_v3"): raise RuntimeError("Currently, inception_v3 is not supported by this example.") # crop_size = 299 # val_size = 320 # I chose this value arbitrarily, we can adjust. else: crop_size = 224 val_size = 256 train_dataset = datasets.ImageFolder( traindir, transforms.Compose([ transforms.RandomResizedCrop(crop_size), transforms.RandomHorizontalFlip(), # transforms.ToTensor(), Too slow # normalize, ])) val_dataset = datasets.ImageFolder(valdir, transforms.Compose([ transforms.Resize(val_size), transforms.CenterCrop(crop_size), ])) train_sampler = None val_sampler = None if args.distributed: train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset) val_sampler = torch.utils.data.distributed.DistributedSampler(val_dataset) collate_fn = lambda b: fast_collate(b, memory_format) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None), num_workers=args.workers, pin_memory=True, sampler=train_sampler, collate_fn=collate_fn) val_loader = torch.utils.data.DataLoader( val_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True, sampler=val_sampler, collate_fn=collate_fn) if args.evaluate: validate(val_loader, model, criterion) return for epoch in range(args.start_epoch, args.epochs): if args.distributed: train_sampler.set_epoch(epoch) # train for one epoch train(train_loader, model, criterion, optimizer, epoch) # evaluate on validation set prec1 = validate(val_loader, model, criterion) # remember best prec@1 and save checkpoint if args.local_rank == 0: is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint({ 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, 'optimizer' : optimizer.state_dict(), }, is_best)
# if torch.cuda.device_count() > 1: # print("Let's use", torch.cuda.device_count(), "GPUs!") # # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs # net = nn.DataParallel(net) # net.to(device) from __future__ import absolute_import, division, print_function, unicode_literals import sys import torch import PIL,os,mimetypes from exp.nb_07a import * torch.Tensor.ndim = property(lambda x: len(x.shape)) torch.set_printoptions(linewidth=300, precision=4, sci_mode=False) def setify(o):return o if isinstance(o,set) else set(listify(o)) def _get_files(path, fs, extensions=None): path = Path(path) if extensions is None: extensions = set(k for k, v in mimetypes.types_map.items() if v.startswith('image/')) res = [path / f for f in fs if not f.startswith('.') and ((not extensions) or f'.{f.split(".")[-1].lower()}' in extensions)] return res def get_files(path, extensions=None, recurse=False, include=None): path = Path(path) extensions = setify(extensions)
'''Encode object boxes and labels.''' import math import torch from torch.nn import functional as F from utils import meshgrid, calculate_iou, nms, change_box_order torch.set_printoptions(profile='full') class DataEncoder(object): def __init__(self, use_gpu=False): self.FloatTensor = torch.cuda.FloatTensor if use_gpu else torch.FloatTensor self.LongTensor = torch.cuda.LongTensor if use_gpu else torch.LongTensor self.anchor_areas = [ 24 * 24., 48 * 48., 96 * 96., 256 * 256., 512 * 512. ] # p3 -> p7 self.aspect_ratios = [1 / 5., 1 / 1., 4 / 1.] self.scale_ratios = [1., pow(2, 1 / 3.), pow(2, 2 / 3.)] self.anchor_wh = self._get_anchor_wh() def _get_anchor_wh(self): """ Compute anchor width and height for each feature map. Returns: anchor_wh: (tensor) anchor wh, sized [#fm, #anchors_per_cell, 2]. """ anchor_wh = [] for s in self.anchor_areas:
import re import traceback import datetime from typing import Dict, Optional, List, Tuple, Union, Any, Set sys.path.append(os.getcwd()) import json import time import random import tqdm import numpy numpy.set_printoptions(threshold=numpy.nan) import torch torch.set_printoptions(threshold=5000) import torch.optim.lr_scheduler from torch.nn.parallel import replicate, parallel_apply from torch.nn.parallel.scatter_gather import scatter_kwargs, gather from tensorboardX import SummaryWriter from torch import optim from torch import nn import allen_model_pytorch as cm import coref_model_dataset_2 as cmdata import util logger = logging.getLogger(__name__) # if __name__ == "__main__": # config = util.get_config("experiments.conf")['best']
import time import math as math import random import torchvision import torchvision.transforms as transforms from torch.optim.swa_utils import AveragedModel, SWALR from torch.optim.lr_scheduler import CosineAnnealingLR from clearml import Task from clearml.automation import UniformParameterRange, UniformIntegerParameterRange from clearml.automation import HyperParameterOptimizer from clearml.automation.optuna import OptimizerOptuna from torch.utils.tensorboard import SummaryWriter import pickle import pytorch_lightning as pl tensorboard_writer = SummaryWriter('./tensorboard_logs') torch.set_printoptions(precision=5, sci_mode=False, threshold=1000) torch.set_default_tensor_type(torch.DoubleTensor) print('\npl version:', pl.__version__) class LitModel(nn.Module): NLL = None F = None muE = None mu_sa = None initD = None mdp_data = None truep = None learned_feature_weights = None configuration_dict = None
def main(): global best_prec1, args best_prec1 = 0 args = parse() # todo: remove?? # test mode, use default args for sanity test if args.test: args.epochs = 1 args.start_epoch = 0 args.arch = "resnet50" args.batch_size = 64 args.data = [] print0("Test mode - no DDP, no apex, RN50, 10 iterations") args.distributed = True # TODO: DDDP: if ht.MPI_WORLD.size > 1 else False print0("loss_scale = {}".format(args.loss_scale), type(args.loss_scale)) print0("\nCUDNN VERSION: {}\n".format(torch.backends.cudnn.version())) cudnn.benchmark = True best_prec1 = 0 # todo: remove? if args.deterministic: cudnn.benchmark = False cudnn.deterministic = True torch.manual_seed(ht.MPI_WORLD.rank) torch.set_printoptions(precision=10) print0("deterministic==True, seed set to global rank") else: torch.manual_seed(999999999) args.gpu = 0 args.world_size = ht.MPI_WORLD.size args.rank = ht.MPI_WORLD.rank rank = args.rank device = torch.device("cpu") if torch.cuda.device_count() > 1: args.gpus = torch.cuda.device_count() loc_rank = rank % args.gpus args.loc_rank = loc_rank device = "cuda:" + str(loc_rank) port = str(29500) # + (args.world_size % args.gpus)) os.environ["MASTER_ADDR"] = "localhost" os.environ["MASTER_PORT"] = port # "29500" if args.local_comms == "nccl": os.environ["NCCL_SOCKET_IFNAME"] = "ib" torch.distributed.init_process_group(backend=args.local_comms, rank=loc_rank, world_size=args.gpus) torch.cuda.set_device(device) args.gpu = loc_rank args.local_rank = loc_rank elif args.gpus == 1: args.gpus = torch.cuda.device_count() args.distributed = False device = "cuda:0" args.local_rank = 0 torch.cuda.set_device(device) torch.cuda.empty_cache() args.total_batch_size = args.world_size * args.batch_size assert torch.backends.cudnn.enabled, "Amp requires cudnn backend to be enabled." # create model if args.pretrained: print0("=> using pre-trained model '{}'".format(args.arch)) model = models.__dict__[args.arch](pretrained=True) else: print0("=> creating model '{}'".format(args.arch)) model = models.__dict__[args.arch]() if (not args.distributed and hasattr(torch, "channels_last") and hasattr(torch, "contiguous_format")): if args.channels_last: memory_format = torch.channels_last else: memory_format = torch.contiguous_format model = model.to(device, memory_format=memory_format) else: model = model.to(device) # model = tDDP(model) -> done in the ht model initialization # Scale learning rate based on global batch size # todo: change the learning rate adjustments to be reduce on plateau args.lr = ( 0.0125 ) # (1. / args.world_size * (5 * (args.world_size - 1) / 6.)) * 0.0125 * args.world_size # args.lr = (1. / args.world_size * (5 * (args.world_size - 1) / 6.)) * 0.0125 * args.world_size optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay) # create DP optimizer and model: daso_optimizer = ht.optim.DASO( local_optimizer=optimizer, total_epochs=args.epochs, max_global_skips=4, stability_level=0.05, ) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.5, patience=5, threshold=0.05, min_lr=1e-4) htmodel = ht.nn.DataParallelMultiGPU(model, daso_optimizer) # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().cuda(device) # Optionally resume from a checkpoint if args.resume: # Use a local scope to avoid dangling references def resume(): if os.path.isfile(args.resume): print0("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load( args.resume, map_location=lambda storage, loc: storage.cuda(args.gpu)) args.start_epoch = checkpoint["epoch"] # best_prec1 = checkpoint["best_prec1"] htmodel.load_state_dict(checkpoint["state_dict"]) optimizer.load_state_dict(checkpoint["optimizer"]) ce = checkpoint["epoch"] print0(f"=> loaded checkpoint '{args.resume}' (epoch {ce})") else: try: resfile = "imgnet-checkpoint-" + str( args.world_size) + ".pth.tar" print0("=> loading checkpoint '{}'".format(resfile)) checkpoint = torch.load(resfile, map_location=lambda storage, loc: storage.cuda(args.gpu)) args.start_epoch = checkpoint["epoch"] # best_prec1 = checkpoint["best_prec1"] htmodel.load_state_dict(checkpoint["state_dict"]) optimizer.load_state_dict(checkpoint["optimizer"]) ce = checkpoint["epoch"] print0(f"=> loaded checkpoint '{resfile}' (epoch {ce})") except FileNotFoundError: print0(f"=> no checkpoint found at '{args.resume}'") resume() # if args.benchmarking: # import pandas as pd nodes = str(int(daso_optimizer.comm.size / torch.cuda.device_count())) cwd = os.getcwd() fname = cwd + "/" + nodes + "imagenet-benchmark" if args.resume and rank == 0 and os.path.isfile(fname + ".pkl"): with open(fname + ".pkl", "rb") as f: out_dict = pickle.load(f) nodes2 = str(daso_optimizer.comm.size / torch.cuda.device_count()) old_keys = [ nodes2 + "-avg-batch-time", nodes2 + "-total-train-time", nodes2 + "-train-top1", nodes2 + "-train-top5", nodes2 + "-train-loss", nodes2 + "-val-acc1", nodes2 + "-val-acc5", ] new_keys = [ nodes + "-avg-batch-time", nodes + "-total-train-time", nodes + "-train-top1", nodes + "-train-top5", nodes + "-train-loss", nodes + "-val-acc1", nodes + "-val-acc5", ] for k in range(len(old_keys)): if old_keys[k] in out_dict.keys(): out_dict[new_keys[k]] = out_dict[old_keys[k]] del out_dict[old_keys[k]] else: out_dict = { "epochs": [], nodes + "-avg-batch-time": [], nodes + "-total-train-time": [], nodes + "-train-top1": [], nodes + "-train-top5": [], nodes + "-train-loss": [], nodes + "-val-acc1": [], nodes + "-val-acc5": [], } print0("Output dict:", fname) if args.arch == "inception_v3": raise RuntimeError( "Currently, inception_v3 is not supported by this example.") # crop_size = 299 # val_size = 320 # I chose this value arbitrarily, we can adjust. else: crop_size = 224 # should this be 256? val_size = 256 pipe = HybridPipe( batch_size=args.batch_size, num_threads=args.workers, device_id=args.loc_rank if not args.manual_dist else 0, data_dir=args.train, label_dir=args.train_indexes, crop=crop_size, dali_cpu=args.dali_cpu, training=True, ) pipe.build() train_loader = DALIClassificationIterator(pipe, reader_name="Reader", last_batch_policy=False) pipe = HybridPipe( batch_size=args.batch_size, num_threads=args.workers, device_id=args.loc_rank if not args.manual_dist else 0, data_dir=args.validate, label_dir=args.validate_indexes, crop=val_size, dali_cpu=args.dali_cpu, training=False, ) pipe.build() val_loader = DALIClassificationIterator(pipe, reader_name="Reader", last_batch_policy=False) if args.evaluate: validate(device, val_loader, htmodel, criterion) return model.epochs = args.start_epoch args.factor = 0 total_time = AverageMeter() for epoch in range(args.start_epoch, args.epochs): # train for one epoch avg_train_time, tacc1, tacc5, ls, train_time = train( device, train_loader, htmodel, criterion, daso_optimizer, epoch) total_time.update(avg_train_time) if args.test: break # evaluate on validation set [prec1, prec5] = validate(device, val_loader, htmodel, criterion) # epoch loss logic to adjust learning rate based on loss daso_optimizer.epoch_loss_logic(ls) # avg_loss.append(ls) print0( "scheduler stuff", ls, scheduler.best * (1.0 - scheduler.threshold), scheduler.num_bad_epochs, ) scheduler.step(ls) print0("next lr:", daso_optimizer.local_optimizer.param_groups[0]["lr"]) # remember best prec@1 and save checkpoint if args.rank == 0: is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) # if epoch in [30, 60, 80]: save_checkpoint( { "epoch": epoch + 1, "arch": args.arch, "state_dict": htmodel.state_dict(), "best_prec1": best_prec1, "optimizer": optimizer.state_dict(), }, is_best=is_best, ) if epoch == args.epochs - 1: print0("##Top-1 {0}\n" "##Top-5 {1}\n" "##Perf {2}".format( prec1, prec5, args.total_batch_size / total_time.avg)) out_dict["epochs"].append(epoch) out_dict[nodes + "-avg-batch-time"].append(avg_train_time) out_dict[nodes + "-total-train-time"].append(train_time) out_dict[nodes + "-train-top1"].append(tacc1) out_dict[nodes + "-train-top5"].append(tacc5) out_dict[nodes + "-train-loss"].append(ls) out_dict[nodes + "-val-acc1"].append(prec1) out_dict[nodes + "-val-acc5"].append(prec5) # save the dict to pick up after the checkpoint save_obj(out_dict, fname) train_loader.reset() val_loader.reset() if args.rank == 0: print("\nRESULTS\n") df = pd.DataFrame.from_dict(out_dict) with pd.option_context("display.max_rows", None, "display.max_columns", None): # more options can be specified also print(df) if args.benchmarking: try: fulldf = pd.read_csv(cwd + "/bench-results.csv") fulldf = pd.concat([df, fulldf], axis=1) except FileNotFoundError: fulldf = df fulldf.to_csv(cwd + "/bench-results.csv")
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved from typing import List import fvcore.nn.weight_init as weight_init import torch from torch import nn from torch.nn import functional as F from detectron2.config import configurable from detectron2.layers import Conv2d, ConvTranspose2d, ShapeSpec, cat, get_norm from detectron2.structures import Instances from detectron2.utils.events import get_event_storage from detectron2.utils.registry import Registry torch.set_printoptions(sci_mode=False) __all__ = [ "BaseMaskRCNNHead", "MaskRCNNConvUpsampleHead", "build_mask_head", "ROI_MASK_HEAD_REGISTRY", ] ROI_MASK_HEAD_REGISTRY = Registry("ROI_MASK_HEAD") ROI_MASK_HEAD_REGISTRY.__doc__ = """ Registry for mask heads, which predicts instance masks given per-region features. The registered object will be called with `obj(cfg, input_shape)`. """ def mask_rcnn_loss(pred_mask_logits, instances, vis_period=0):
from int_phy_recollect_position import SHAPE_TYPES, SCENE_TYPES, DATA_SAVE_DIR, ON_GROUND_THRESHOLD import os import torch torch.set_printoptions(profile="full", precision=2, linewidth=10000) n = 0 for t in SHAPE_TYPES: shape_dir_no_occluder = os.path.join(DATA_SAVE_DIR, "without_occluder", t) shape_dir_occluder = os.path.join(DATA_SAVE_DIR, "with_occluder", t) ground_dir = os.path.join(DATA_SAVE_DIR, "ground", t) for scene_dir in SCENE_TYPES: scene_dir_no_occluder = os.path.join(shape_dir_no_occluder, scene_dir) scene_dir_occluder = os.path.join(shape_dir_occluder, scene_dir) tensor_files = os.listdir(scene_dir_no_occluder) for one_file in sorted(tensor_files): no_occluder_file = os.path.join(scene_dir_no_occluder, one_file) print(no_occluder_file) tensor_no_occluder = torch.load(no_occluder_file) if scene_dir == "gravity": gravity_dir = os.path.join(ground_dir, scene_dir) ground_feature = torch.load(os.path.join(gravity_dir, one_file)).unsqueeze(1).repeat_interleave(60, dim=1) assert ground_feature.size()[0] == tensor_no_occluder.size()[0] object_x_to_idx = torch.round(tensor_no_occluder[:,:,[0]].repeat_interleave(5, dim=2) * 100 + 650).long() object_x_to_idx[:,:,0] -= 100 # 100 index = 1 meter object_x_to_idx[:,:,1] -= 50
import numpy as np import os import pdb import torch import torch.optim as optim from torch.autograd import Variable import torch.nn.functional as F from torchvision import transforms from IGVCDataset import IGVCDataset import models.model import utils np.set_printoptions(threshold=np.nan) torch.set_printoptions(precision=10) # Training settings. parser = argparse.ArgumentParser(description='IGVC segmentation of lines.') # Hyperparameters. parser.add_argument('--batch_size', type=int, default=1, help='input batch size for training.') parser.add_argument('--epochs', type=int, default=5, help='number of epochs to train') parser.add_argument('--im_size', type=int, nargs=3, default=[3,400,400], help='image dimensions for training.') parser.add_argument('--kernel_size', type=int, default=3, help='size of convolution kernels/filters.') parser.add_argument('--lr', type=float, default=1e-3,
def main(): parser = argparse.ArgumentParser(description='ExtComAbs Model') # Where to find data parser.add_argument('--data_dir', type=str, default='datasets/cnndm',help='The dataset directory.') parser.add_argument('--cache_dir', type=str, default='cache/cnndm',help='The processed dataset directory') parser.add_argument('--embedding_path', type=str, default='./Glove/glove.42B.300d.txt', help='Path expression to external word embedding.') parser.add_argument('--model', type=str, default='class',help='generate document or classfiler[gen_doc or class]') parser.add_argument('--restore_model', type=str, default='None', help='Restore model for further training. [bestmodel/bestFmodel/earlystop/None]') # Where to save output parser.add_argument('--save_root', type=str, default='save/', help='Root directory for all model.') parser.add_argument('--log_root', type=str, default='log/', help='Root directory for all logging.') # Hyperparameters parser.add_argument('--gpu', type=str, default='0', help='GPU ID to use. [default: 0]') parser.add_argument('--cuda', action='store_true', default=False, help='GPU or CPU [default: False]') parser.add_argument('--vocab_size', type=int, default=10,help='Size of vocabulary. [default: 50000]') parser.add_argument('--n_epochs', type=int, default=3, help='Number of epochs [default: 20]') parser.add_argument('--batch_size', type=int, default=32, help='Mini batch size [default: 32]') parser.add_argument('--word_embedding', action='store_true', default=True, help='whether to use Word embedding [default: True]') parser.add_argument('--word_emb_dim', type=int, default=4, help='Word embedding size [default: 256],Glove dim is 300') parser.add_argument('--embed_train', action='store_true', default=False,help='whether to train Word embedding [default: False]') parser.add_argument('--n_head', type=int, default=1, help='multihead attention number [default: 8]') parser.add_argument('--sent_max_len', type=int, default=5,help='max length of sentences (max source text sentence tokens)[default:100]') parser.add_argument('--doc_max_timesteps', type=int, default=3,help='max length of documents (max timesteps of documents)[default:50]') # Training parser.add_argument('--lr', type=float, default=0.01, help='learning rate') parser.add_argument('--lr_descent', action='store_true', default=True, help='learning rate descent') parser.add_argument('--grad_clip', action='store_true', default=True, help='for gradient clipping') parser.add_argument('--max_grad_norm', type=float, default=1.0, help='for gradient clipping max gradient normalization') parser.add_argument('-m', type=int, default=3, help='decode summary length') args = parser.parse_args() os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu torch.set_printoptions(threshold=50000) # File paths DATA_FILE = os.path.join(args.data_dir, "train.label.jsonl2") VALID_FILE = os.path.join(args.data_dir, "val.label.jsonl2") VOCAL_FILE = os.path.join(args.cache_dir, "vocab") LOG_PATH = args.log_root # train_log setting if not os.path.exists(LOG_PATH): os.makedirs(LOG_PATH) now_time = datetime.datetime.now().strftime('%Y%m%d_%H%M%S') log_path = os.path.join(LOG_PATH, "train_" + now_time) file_handler = logging.FileHandler(log_path) file_handler.setFormatter(formatter) logger.addHandler(file_handler) logger.info("Pytorch %s", torch.__version__) logger.info("[INFO] Create Vocab, vocab path is %s", VOCAL_FILE) # 创建词汇表 vocab = Vocab(VOCAL_FILE, args.vocab_size) embed = torch.nn.Embedding(vocab.size(), args.word_emb_dim, padding_idx=0) # 加载预训练的Embedding权重 # if args.word_embedding: if False: embed_loader = Word_Embedding(args.embedding_path, vocab) vectors = embed_loader.load_my_vecs(args.word_emb_dim) pretrained_weight = embed_loader.add_unknown_words_by_avg(vectors, args.word_emb_dim) # copy预训练的权重参数 embed.weight.data.copy_(torch.Tensor(pretrained_weight)) # 是否对Embedding进行train embed.weight.requires_0grad = args.embed_train logger.info(args) dataset = ExampleSet(DATA_FILE, vocab, args.doc_max_timesteps, args.sent_max_len) train_loader = torch.utils.data.DataLoader(dataset, batch_size=args.batch_size, shuffle=True, num_workers=1) del dataset valid_dataset= ExampleSet(VALID_FILE, vocab, args.doc_max_timesteps, args.sent_max_len) valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=args.batch_size, shuffle=False, num_workers=1) if args.model == "gen_doc": model = GengrateDocument(args, embed) logger.info("[MODEL] GengrateDocument ") if args.cuda: model.to(torch.device("cuda")) logger.info("[INFO] Use cuda") setup_training(model, train_loader, valid_loader, valid_dataset, args , vocab) else: model_gen = GengrateDocument(args, embed) model_class = Classfiler(args, embed) logger.info("[MODEL] Classfiler ") if args.cuda: model_gen.to(torch.device("cuda")) model_class.to(torch.device("cuda")) logger.info("[INFO] Use cuda") run_training_class(model_gen, model_class, train_loader, valid_loader, valid_dataset, args , vocab)