Beispiel #1
0
    def __init__(self, args):
        super(ArcI, self).__init__()

        # optimizer = torch.optim.Adam(model.parameters(), lr=1e-4, weight_decay=0.01)

        self.embedding = nn.Embedding.from_pretrained(
            load_pkl(args.data_dir + args.embedding_matrix_name), freeze=True)

        # two for convolution, two for pooling, and two for MLP
        self.conv_left = nn.ModuleList([
            self.make_conv1d_pool_block(128, 128, 3, nn.ReLU(), 2),
            self.make_conv1d_pool_block(128, 128, 5, nn.ReLU(), 2),
            self.make_conv1d_pool_block(128, 128, 7, nn.ReLU(), 2)
        ])
        self.conv_right = nn.ModuleList([
            self.make_conv1d_pool_block(128, 128, 3, nn.ReLU(), 2),
            self.make_conv1d_pool_block(128, 128, 5, nn.ReLU(), 2),
            self.make_conv1d_pool_block(128, 128, 7, nn.ReLU(), 2)
        ])

        self.dropout = nn.Dropout(p=0.5)
        self.mlp1 = nn.Sequential(nn.Linear(128 * 12, 128), nn.ReLU())
        self.mlp2 = nn.Sequential(nn.Linear(128, 64), nn.ReLU())

        self.predict = nn.Linear(64, 2)
Beispiel #2
0
    def __init__(self, args):
        super(ThreeLayers, self).__init__()
        self.embedding = nn.Embedding.from_pretrained(
            load_pkl(args.data_dir + args.embedding_matrix_name), freeze=True)
        self.fact_convs = nn.ModuleList([
            nn.Conv1d(args.embedding_dim, args.filters_num,
                      args.kernel_size_1),
            nn.Conv1d(args.filters_num, args.filters_num, args.kernel_size_2)
        ])
        self.article_convs = nn.ModuleList([
            nn.Conv1d(args.embedding_dim, args.filters_num,
                      args.kernel_size_1),
            nn.Conv1d(args.filters_num, args.filters_num, args.kernel_size_2),
        ])

        self.conv_paddings = nn.ModuleList([
            nn.ConstantPad1d(
                (args.kernel_size_1 // 2 - 1, args.kernel_size_1 // 2), 0.),
            nn.ConstantPad1d(
                (args.kernel_size_2 // 2 - 1, args.kernel_size_2 // 2), 0.)
        ])

        self.ffs = nn.ModuleList(
            [nn.Linear(args.embedding_dim, args.linear_output)] + [
                nn.Linear(args.filters_num, args.linear_output)
                for _ in range(2)
            ] + [nn.Linear(args.article_len * 3, args.linear_output)])

        self.predict = nn.Linear(args.linear_output, 2)
Beispiel #3
0
    def __init__(self, corpus):
        stop_words = [
            line.strip()
            for line in load_txt(args.data_dir + args.stopwords_name)
        ]
        vocab = load_pkl(args.data_dir + args.vocab_name)
        articles = [
            item[1].split(':')[1] for item in [
                line.strip().split('|')
                for line in load_txt(args.data_dir + args.crime + '/' +
                                     args.law_article_content_name)
            ]
        ]
        articles = [[
            vocab[word] if word in vocab else vocab['<UNK>']
            for word in article if word not in stop_words
        ] for article in articles]
        sorted_articles = sorted(articles, key=lambda x: len(x), reverse=True)
        idx_reflect = {
            articles.index(article): sorted_articles.index(article)
            for article in articles
        }
        sorted_articles = [
            torch.tensor(article) for article in sorted_articles
        ]
        self.articles_lens = [len(article) for article in sorted_articles]
        self.articles = torch.nn.utils.rnn.pad_sequence(sorted_articles,
                                                        batch_first=True)

        self.corpus = []
        self.labels = []
        self.max_fact_len = 0

        for data in corpus:
            label = data['decision']
            if label is None:
                continue

            tokenized_data = []

            for sen in data['sentences']:
                fact = [
                    vocab[word] if word in vocab else vocab['<UNK>']
                    for word in jieba.lcut(sen) if word not in stop_words
                ]
                if len(fact) == 0:
                    continue
                self.max_fact_len = max(self.max_fact_len, len(fact))
                article = [idx_reflect[idx] for idx in data['sentences'][sen]]
                if len(article) == 0:
                    continue
                tokenized_data.append((fact, article))
            if len(tokenized_data) == 0:
                continue
            self.corpus.append(tokenized_data)
            self.labels.append(label_reflect(label))
Beispiel #4
0
    def __init__(self, arg):
        super(DecisionPredictor, self).__init__()
        self.fine_grained = arg.fine_grained_penalty_predictor
        self.embedding = nn.Embedding.from_pretrained(
            load_pkl(args.data_dir + args.embedding_matrix_name), freeze=True)
        self.fact_encoder = FactEncoder(arg)
        self.article_encoder = ArticleEncoder(arg)

        self.interaction = nn.Sequential(nn.Dropout(p=0.3),
                                         nn.Linear(arg.hidden_size * 4, 64))
        self.predict = nn.Sequential(nn.Dropout(p=0.3),
                                     nn.Linear(64, arg.num_decisions))
Beispiel #5
0
    def __init__(self, args):
        super(MVLSTM, self).__init__()

        # optimizer = torch.optim.Adam(model.parameters(), lr=1e-4, weight_decay=0.01)

        self.embedding = nn.Embedding.from_pretrained(
            load_pkl(args.data_dir + args.embedding_matrix_name), freeze=True)
        self.left_lstm = nn.LSTM(128,
                                 128,
                                 num_layers=1,
                                 batch_first=True,
                                 bidirectional=True)
        self.right_lstm = nn.LSTM(128,
                                  128,
                                  num_layers=1,
                                  batch_first=True,
                                  bidirectional=True)
        self.dropout = nn.Dropout(p=0.2)
        self.mlp = nn.Linear(64, 32)
        self.predict = nn.Linear(32, 2)
Beispiel #6
0
    def __init__(self, args):
        super(MatchPyramid, self).__init__()

        # optimizer = torch.optim.Adam(model.parameters(), lr=1e-5, weight_decay=0.1)

        self.embedding = nn.Embedding.from_pretrained(
            load_pkl(args.data_dir + args.embedding_matrix_name), freeze=True)

        # two convolutional layers
        # two max-pooling layers (one of which is a dynamic pooling layer for variable length)
        # two full connection layers
        self.conv_1 = nn.Sequential(
            nn.ConstantPad2d((0, 5 - 1, 0, 5 - 1), 0),
            nn.Conv2d(in_channels=1, out_channels=64, kernel_size=(5, 5)),
            nn.ReLU(), nn.MaxPool2d(kernel_size=(2, 2)))
        self.conv_2 = nn.Sequential(
            nn.ConstantPad2d((0, 3 - 1, 0, 3 - 1), 0),
            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=(3, 3)),
            nn.ReLU(), nn.AdaptiveAvgPool2d((5, 10)))
        self.dropout = nn.Dropout(p=0.2)
        self.mlp = nn.Linear(128 * 5 * 10, 128)
        self.predict = nn.Linear(128, 2)
Beispiel #7
0
    def __init__(self, args):
        super(ArcII, self).__init__()

        # optimizer = torch.optim.Adam(model.parameters(), lr=1e-4, weight_decay=0.1)

        self.embedding = nn.Embedding.from_pretrained(
            load_pkl(args.data_dir + args.embedding_matrix_name), freeze=True)
        # three for convolution, three for pooling, and two for MLP
        self.conv1d_left = nn.Sequential(
            nn.ConstantPad1d((0, 3 - 1), 0),
            nn.Conv1d(in_channels=128, out_channels=128, kernel_size=3))
        self.conv1d_right = nn.Sequential(
            nn.ConstantPad1d((0, 3 - 1), 0),
            nn.Conv1d(in_channels=128, out_channels=128, kernel_size=3))
        self.layer2_activation = nn.ReLU()
        self.layer2_pooling = nn.MaxPool2d(kernel_size=(2, 2))
        self.conv2ds = nn.ModuleList([
            self.make_conv2d_pool_block(128, 128, (3, 3), nn.ReLU(), (2, 2)),
            self.make_conv2d_pool_block(128, 128, (5, 5), nn.ReLU(), (2, 2))
        ])
        self.dropout = nn.Dropout(p=0.2)
        self.mlp = nn.Sequential(nn.Linear(128 * 6 * 6, 128), nn.ReLU())
        self.predict = nn.Linear(128, 2)