def __init__(self, config, usegpu): super(Pipeline, self).__init__() self.encoder_list = [] self.task_name = config.get("data", "type_of_label").replace(" ", "").split(",") self.features = config.getint("net", "fc1_feature") for a in range(0, len(self.task_name)): self.encoder_list.append(CNNEncoder(config, usegpu)) self.encoder_list = nn.ModuleList(self.encoder_list) self.out_fc = [] for a in range(0, len(self.task_name)): self.out_fc.append(nn.Linear(self.features, get_num_classes(self.task_name[a]))) self.out_fc = nn.ModuleList(self.out_fc) self.mix_fc = [] for a in range(0, len(self.task_name)): mix_fc = [] for b in range(0, len(self.task_name)): mix_fc.append(nn.Linear(get_num_classes(self.task_name[a]), self.features)) mix_fc = nn.ModuleList(mix_fc) self.mix_fc.append(mix_fc) self.mix_fc = nn.ModuleList(self.mix_fc) self.combine_fc = [] for a in range(0, len(self.task_name)): self.combine_fc.append(nn.Linear(self.features, self.features)) self.combine_fc = nn.ModuleList(self.combine_fc) self.dropout = nn.Dropout(config.getfloat("train", "dropout")) self.softmax = nn.Softmax()
def analyze_time(data, config): res = torch.from_numpy(np.zeros(get_num_classes("time"))) opt = get_time_id(data, config) res[opt] = 1 return res
def analyze_law(data, config): res = torch.from_numpy(np.zeros(get_num_classes("law"))) for x in data: y = (x[0], x[1]) if y in law_dict.keys(): res[law_dict[y]] = 1 return res
def __init__(self, config, usegpu): super(NNFactArt, self).__init__() self.data_size = config.getint("data", "vec_size") self.hidden_dim = config.getint("net", "hidden_size") self.top_k = config.getint("data", "top_k") self.ufs = torch.ones(1, self.hidden_dim) # self.ufs = torch.randn(1, self.hidden_dim) self.ufs = torch.cat([self.ufs for i in range(config.getint("data", "batch_size"))], dim=0) self.ufw = torch.ones(1, self.hidden_dim) # self.ufw = torch.randn(1, self.hidden_dim) self.ufw = torch.cat( [self.ufw for i in range(config.getint("data", "batch_size") * config.getint("data", "sentence_num"))], dim=0) if (usegpu): self.ufs = torch.autograd.Variable(self.ufs).cuda() self.ufw = torch.autograd.Variable(self.ufw).cuda() else: self.ufs = torch.autograd.Variable(self.ufs) self.ufw = torch.autograd.Variable(self.ufw) self.gru_sentence_f = nn.GRU(self.data_size, self.hidden_dim, batch_first=True) self.gru_document_f = nn.GRU(self.hidden_dim, self.hidden_dim, batch_first=True) self.gru_sentence_a = [] self.gru_document_a = [] for i in range(self.top_k): self.gru_sentence_a.append(nn.GRU(self.data_size, self.hidden_dim, batch_first=True)) self.gru_document_a.append(nn.GRU(self.hidden_dim, self.hidden_dim, batch_first=True)) self.attentions_f = AttentionTanH(config) self.attentionw_f = AttentionTanH(config) self.attentions_a = [] self.attentionw_a = [] for i in range(self.top_k): self.attentions_a.append(AttentionTanH(config)) self.attentionw_a.append(AttentionTanH(config)) self.attention_a = AttentionTanH(config) task_name = config.get("data", "type_of_label").replace(" ", "").split(",")[0] self.outfc = nn.Linear(150, get_num_classes(task_name)) self.midfc1 = nn.Linear(self.hidden_dim * 2, 200) self.midfc2 = nn.Linear(200, 150) self.attfc_as = nn.Linear(self.hidden_dim, self.hidden_dim) self.attfc_aw = nn.Linear(self.hidden_dim, self.hidden_dim) self.attfc_ad = nn.Linear(self.hidden_dim, self.hidden_dim) self.birnn = nn.RNN(self.hidden_dim, self.hidden_dim, batch_first=True) self.init_hidden(config, usegpu) self.gru_sentence_a = nn.ModuleList(self.gru_sentence_a) self.gru_document_a = nn.ModuleList(self.gru_document_a) self.attentions_a = nn.ModuleList(self.attentions_a) self.attentionw_a = nn.ModuleList(self.attentionw_a) self.svm = svm(config, usegpu) self.decoder = FCDecoder(config, usegpu)
def __init__(self, config, usegpu): super(LSTMArticleDecoder, self).__init__() self.feature_len = config.getint("net", "hidden_size") features = config.getint("net", "hidden_size") self.hidden_dim = features self.outfc = [] task_name = config.get("data", "type_of_label").replace(" ", "").split(",") for x in task_name: self.outfc.append(nn.Linear(features, get_num_classes(x))) self.midfc = [] for x in task_name: self.midfc.append(nn.Linear(features, features)) self.cell_list = [None] for x in task_name: self.cell_list.append( nn.LSTMCell(config.getint("net", "hidden_size"), config.getint("net", "hidden_size"))) self.hidden_state_fc_list = [] for a in range(0, len(task_name) + 1): arr = [] for b in range(0, len(task_name) + 1): arr.append(nn.Linear(features, features)) arr = nn.ModuleList(arr) self.hidden_state_fc_list.append(arr) self.cell_state_fc_list = [] for a in range(0, len(task_name) + 1): arr = [] for b in range(0, len(task_name) + 1): arr.append(nn.Linear(features, features)) arr = nn.ModuleList(arr) self.cell_state_fc_list.append(arr) self.attention = Attention(config) self.outfc = nn.ModuleList(self.outfc) self.midfc = nn.ModuleList(self.midfc) self.cell_list = nn.ModuleList(self.cell_list) self.hidden_state_fc_list = nn.ModuleList(self.hidden_state_fc_list) self.cell_state_fc_list = nn.ModuleList(self.cell_state_fc_list) self.sigmoid = nn.Sigmoid() self.article_encoder = ArticleEncoder(config, usegpu) self.article_fc_list = [] for a in range(0, len(task_name) + 1): self.article_fc_list.append(nn.Linear(features, features)) self.article_fc_list = nn.ModuleList(self.article_fc_list)
def forward(self, x, doc_len, config, label): label_list = [] accumulate = 0 for a in range(0, len(self.task_name)): num = get_num_classes(self.task_name[a]) label_list.append(label[:, accumulate:accumulate + num].float()) accumulate += num outputs = [] format_outputs = [] for a in range(0, len(self.task_name)): document_embedding = self.combine_fc[a](self.encoder_list[a].forward(x, doc_len, config)) for b in range(0, a): if self.training: document_embedding = document_embedding + self.mix_fc[b][a](label_list[b]) else: document_embedding = document_embedding + self.mix_fc[b][a](format_outputs[b]) output = self.out_fc[a](document_embedding) outputs.append(output) output = torch.max(output, dim=1)[1] output = one_hot(output, get_num_classes(self.task_name[a])) format_outputs.append(output) return outputs
def __init__(self, config, usegpu): super(FCDecoder, self).__init__() try: features = config.getint("net", "fc1_feature") except configparser.NoOptionError: features = config.getint("net", "hidden_size") self.outfc = [] task_name = config.get("data", "type_of_label").replace(" ", "").split(",") for x in task_name: self.outfc.append(nn.Linear(features, get_num_classes(x))) self.midfc = [] for x in task_name: self.midfc.append(nn.Linear(features, features)) self.outfc = nn.ModuleList(self.outfc) self.midfc = nn.ModuleList(self.midfc) self.sigmoid = nn.Sigmoid()
def analyze_crit(data, config): res = torch.from_numpy(np.zeros(get_num_classes("crit"))) for x in data: if x in accusation_dict.keys(): res[accusation_dict[x]] = 1 return res
def __init__(self, config, usegpu): super(NNFactArtSeq, self).__init__() self.data_size = config.getint("data", "vec_size") self.hidden_dim = config.getint("net", "hidden_size") self.top_k = config.getint("data", "top_k") self.gru_sentence_f = nn.GRU(self.data_size, self.hidden_dim, batch_first=True) self.gru_document_f = nn.GRU(self.hidden_dim, self.hidden_dim, batch_first=True) self.gru_sentence_a = [] self.gru_document_a = [] for i in range(self.top_k): self.gru_sentence_a.append(nn.GRU(self.data_size, self.hidden_dim, batch_first=True)) self.gru_document_a.append(nn.GRU(self.hidden_dim, self.hidden_dim, batch_first=True)) self.attentions_f = AttentionTanH(config) self.attentionw_f = AttentionTanH(config) self.attentions_a = [] self.attentionw_a = [] for i in range(self.top_k): self.attentions_a.append(AttentionTanH(config)) self.attentionw_a.append(AttentionTanH(config)) self.attention_a = AttentionTanH(config) # task_name = config.get("data", "type_of_label").replace(" ", "").split(",")[0] # self.outfc = nn.Linear(150, get_num_classes(task_name)) self.midfc1 = nn.Linear(self.hidden_dim * 2, self.hidden_dim * 2) self.midfc2 = nn.Linear(self.hidden_dim * 2, self.hidden_dim) self.attfc_as = nn.Linear(self.hidden_dim, self.hidden_dim) self.attfc_aw = nn.Linear(self.hidden_dim, self.hidden_dim) self.attfc_ad = nn.Linear(self.hidden_dim, self.hidden_dim) self.birnn = nn.RNN(self.hidden_dim, self.hidden_dim, batch_first=True) self.init_hidden(config, usegpu) self.gru_sentence_a = nn.ModuleList(self.gru_sentence_a) self.gru_document_a = nn.ModuleList(self.gru_document_a) self.attentions_a = nn.ModuleList(self.attentions_a) self.attentionw_a = nn.ModuleList(self.attentionw_a) self.outfc = [] task_name = config.get("data", "type_of_label").replace(" ", "").split(",") for x in task_name: self.outfc.append(nn.Linear( self.hidden_dim, get_num_classes(x) )) self.midfc = [] for x in task_name: self.midfc.append(nn.Linear(self.hidden_dim, self.hidden_dim)) self.cell_list = [None] for x in task_name: self.cell_list.append(nn.LSTMCell(self.hidden_dim, self.hidden_dim)) self.outfc = nn.ModuleList(self.outfc) self.midfc = nn.ModuleList(self.midfc) self.hidden_state_fc_list = [] for a in range(0, len(task_name) + 1): arr = [] for b in range(0, len(task_name) + 1): arr.append(nn.Linear(self.hidden_dim, self.hidden_dim)) arr = nn.ModuleList(arr) self.hidden_state_fc_list.append(arr) self.cell_state_fc_list = [] for a in range(0, len(task_name) + 1): arr = [] for b in range(0, len(task_name) + 1): arr.append(nn.Linear(self.hidden_dim, self.hidden_dim)) arr = nn.ModuleList(arr) self.cell_state_fc_list.append(arr) self.cell_list = nn.ModuleList(self.cell_list) self.hidden_state_fc_list = nn.ModuleList(self.hidden_state_fc_list) self.cell_state_fc_list = nn.ModuleList(self.cell_state_fc_list)