def __init__(self, args): super(ArcI, self).__init__() # optimizer = torch.optim.Adam(model.parameters(), lr=1e-4, weight_decay=0.01) self.embedding = nn.Embedding.from_pretrained( load_pkl(args.data_dir + args.embedding_matrix_name), freeze=True) # two for convolution, two for pooling, and two for MLP self.conv_left = nn.ModuleList([ self.make_conv1d_pool_block(128, 128, 3, nn.ReLU(), 2), self.make_conv1d_pool_block(128, 128, 5, nn.ReLU(), 2), self.make_conv1d_pool_block(128, 128, 7, nn.ReLU(), 2) ]) self.conv_right = nn.ModuleList([ self.make_conv1d_pool_block(128, 128, 3, nn.ReLU(), 2), self.make_conv1d_pool_block(128, 128, 5, nn.ReLU(), 2), self.make_conv1d_pool_block(128, 128, 7, nn.ReLU(), 2) ]) self.dropout = nn.Dropout(p=0.5) self.mlp1 = nn.Sequential(nn.Linear(128 * 12, 128), nn.ReLU()) self.mlp2 = nn.Sequential(nn.Linear(128, 64), nn.ReLU()) self.predict = nn.Linear(64, 2)
def __init__(self, args): super(ThreeLayers, self).__init__() self.embedding = nn.Embedding.from_pretrained( load_pkl(args.data_dir + args.embedding_matrix_name), freeze=True) self.fact_convs = nn.ModuleList([ nn.Conv1d(args.embedding_dim, args.filters_num, args.kernel_size_1), nn.Conv1d(args.filters_num, args.filters_num, args.kernel_size_2) ]) self.article_convs = nn.ModuleList([ nn.Conv1d(args.embedding_dim, args.filters_num, args.kernel_size_1), nn.Conv1d(args.filters_num, args.filters_num, args.kernel_size_2), ]) self.conv_paddings = nn.ModuleList([ nn.ConstantPad1d( (args.kernel_size_1 // 2 - 1, args.kernel_size_1 // 2), 0.), nn.ConstantPad1d( (args.kernel_size_2 // 2 - 1, args.kernel_size_2 // 2), 0.) ]) self.ffs = nn.ModuleList( [nn.Linear(args.embedding_dim, args.linear_output)] + [ nn.Linear(args.filters_num, args.linear_output) for _ in range(2) ] + [nn.Linear(args.article_len * 3, args.linear_output)]) self.predict = nn.Linear(args.linear_output, 2)
def __init__(self, corpus): stop_words = [ line.strip() for line in load_txt(args.data_dir + args.stopwords_name) ] vocab = load_pkl(args.data_dir + args.vocab_name) articles = [ item[1].split(':')[1] for item in [ line.strip().split('|') for line in load_txt(args.data_dir + args.crime + '/' + args.law_article_content_name) ] ] articles = [[ vocab[word] if word in vocab else vocab['<UNK>'] for word in article if word not in stop_words ] for article in articles] sorted_articles = sorted(articles, key=lambda x: len(x), reverse=True) idx_reflect = { articles.index(article): sorted_articles.index(article) for article in articles } sorted_articles = [ torch.tensor(article) for article in sorted_articles ] self.articles_lens = [len(article) for article in sorted_articles] self.articles = torch.nn.utils.rnn.pad_sequence(sorted_articles, batch_first=True) self.corpus = [] self.labels = [] self.max_fact_len = 0 for data in corpus: label = data['decision'] if label is None: continue tokenized_data = [] for sen in data['sentences']: fact = [ vocab[word] if word in vocab else vocab['<UNK>'] for word in jieba.lcut(sen) if word not in stop_words ] if len(fact) == 0: continue self.max_fact_len = max(self.max_fact_len, len(fact)) article = [idx_reflect[idx] for idx in data['sentences'][sen]] if len(article) == 0: continue tokenized_data.append((fact, article)) if len(tokenized_data) == 0: continue self.corpus.append(tokenized_data) self.labels.append(label_reflect(label))
def __init__(self, arg): super(DecisionPredictor, self).__init__() self.fine_grained = arg.fine_grained_penalty_predictor self.embedding = nn.Embedding.from_pretrained( load_pkl(args.data_dir + args.embedding_matrix_name), freeze=True) self.fact_encoder = FactEncoder(arg) self.article_encoder = ArticleEncoder(arg) self.interaction = nn.Sequential(nn.Dropout(p=0.3), nn.Linear(arg.hidden_size * 4, 64)) self.predict = nn.Sequential(nn.Dropout(p=0.3), nn.Linear(64, arg.num_decisions))
def __init__(self, args): super(MVLSTM, self).__init__() # optimizer = torch.optim.Adam(model.parameters(), lr=1e-4, weight_decay=0.01) self.embedding = nn.Embedding.from_pretrained( load_pkl(args.data_dir + args.embedding_matrix_name), freeze=True) self.left_lstm = nn.LSTM(128, 128, num_layers=1, batch_first=True, bidirectional=True) self.right_lstm = nn.LSTM(128, 128, num_layers=1, batch_first=True, bidirectional=True) self.dropout = nn.Dropout(p=0.2) self.mlp = nn.Linear(64, 32) self.predict = nn.Linear(32, 2)
def __init__(self, args): super(MatchPyramid, self).__init__() # optimizer = torch.optim.Adam(model.parameters(), lr=1e-5, weight_decay=0.1) self.embedding = nn.Embedding.from_pretrained( load_pkl(args.data_dir + args.embedding_matrix_name), freeze=True) # two convolutional layers # two max-pooling layers (one of which is a dynamic pooling layer for variable length) # two full connection layers self.conv_1 = nn.Sequential( nn.ConstantPad2d((0, 5 - 1, 0, 5 - 1), 0), nn.Conv2d(in_channels=1, out_channels=64, kernel_size=(5, 5)), nn.ReLU(), nn.MaxPool2d(kernel_size=(2, 2))) self.conv_2 = nn.Sequential( nn.ConstantPad2d((0, 3 - 1, 0, 3 - 1), 0), nn.Conv2d(in_channels=64, out_channels=128, kernel_size=(3, 3)), nn.ReLU(), nn.AdaptiveAvgPool2d((5, 10))) self.dropout = nn.Dropout(p=0.2) self.mlp = nn.Linear(128 * 5 * 10, 128) self.predict = nn.Linear(128, 2)
def __init__(self, args): super(ArcII, self).__init__() # optimizer = torch.optim.Adam(model.parameters(), lr=1e-4, weight_decay=0.1) self.embedding = nn.Embedding.from_pretrained( load_pkl(args.data_dir + args.embedding_matrix_name), freeze=True) # three for convolution, three for pooling, and two for MLP self.conv1d_left = nn.Sequential( nn.ConstantPad1d((0, 3 - 1), 0), nn.Conv1d(in_channels=128, out_channels=128, kernel_size=3)) self.conv1d_right = nn.Sequential( nn.ConstantPad1d((0, 3 - 1), 0), nn.Conv1d(in_channels=128, out_channels=128, kernel_size=3)) self.layer2_activation = nn.ReLU() self.layer2_pooling = nn.MaxPool2d(kernel_size=(2, 2)) self.conv2ds = nn.ModuleList([ self.make_conv2d_pool_block(128, 128, (3, 3), nn.ReLU(), (2, 2)), self.make_conv2d_pool_block(128, 128, (5, 5), nn.ReLU(), (2, 2)) ]) self.dropout = nn.Dropout(p=0.2) self.mlp = nn.Sequential(nn.Linear(128 * 6 * 6, 128), nn.ReLU()) self.predict = nn.Linear(128, 2)