import torch import os import torch.nn as nn import numpy as np import time from model import textCNN import sen2inds import get_wordlists word2ind, ind2word = get_wordlists.get_worddict() label_w2n, label_n2w = sen2inds.read_labelFile( 'textCNN_chinese_CrosssValidation\model_save\label.txt') textCNN_param = { 'vocab_size': len(word2ind), 'embed_dim': 100, 'class_num': len(label_w2n), "kernel_num": 100, "kernel_size": [3, 4, 5], "dropout": 0.5, } def get_testData(file): datas = open(file, 'r').read().split('\n') datas = list(filter(None, datas)) return datas
# coding=utf-8 import torch import os import torch.nn as nn import numpy as np import time from classifier.nets.textcnn import textCNN import sen2inds import textCNN_data word2ind, ind2word = sen2inds.get_worddict('wordLabel.txt') label_w2n, label_n2w = sen2inds.read_labelFile('label.txt') textCNN_param = { 'vocab_size': len(word2ind), 'embed_dim': 60, 'class_num': len(label_w2n), "kernel_num": 16, "kernel_size": [3, 4, 5], "dropout": 0.5, } dataLoader_param = { 'batch_size': 128, 'shuffle': True, } #init dataset print('init dataset...') dataLoader = textCNN_data.textCNN_dataLoader(dataLoader_param) valdata = textCNN_data.get_valdata()
import torch import os import torch.nn as nn import numpy as np import time from model import textCNN import sen2inds import get_wordlists word2ind, ind2word = get_wordlists.get_worddict() label_w2n, label_n2w = sen2inds.read_labelFile( 'D:\pathon\work\BERTProject-master\\textCNN_chinese\model_save\label.txt') textCNN_param = { 'vocab_size': len(word2ind), 'embed_dim': 50, 'class_num': len(label_w2n), "kernel_num": 20, "kernel_size": [3, 4, 5], "dropout": 0.5, } def get_testData(file): datas = open(file, 'r').read().split('\n') datas = list(filter(None, datas)) return datas
import torch.nn as nn import numpy as np import time import matplotlib.pyplot as plt from model import textCNN import sen2inds import textCNN_data trainDataVecFile = 'data/txt/train_data_vec.txt' testDataVecFile = 'data/txt/test_data_vec.txt' #word2ind字典:字符为key,角标为value #ind2word字典:角标为key,字符为value word2ind, ind2word = sen2inds.get_worddict('data/txt/train_wordLabel.txt') #label_w2n字典:字符(子板块名)为key,角标为value #label_n2w字典:角标为key,字符(子板块名)为value label_w2n, label_n2w = sen2inds.read_labelFile('data/txt/train_label.txt') weightFile = 'data/pkl/weight.pkl' textCNN_param = { 'vocab_size': len(word2ind), 'embed_dim': 60, 'class_num': len(label_w2n), "kernel_num": 16, "kernel_size": [3, 4, 5], "dropout": 0.5, } dataLoader_param = { 'batch_size': 128, 'shuffle': True, }
import torch import os import torch.nn as nn import numpy as np import time from model import textCNN import sen2inds import textCNN_data import get_wordlists word2ind, ind2word = get_wordlists.get_worddict() label_w2n, label_n2w = sen2inds.read_labelFile( 'textCNN_chinese\model_save\label.txt') textCNN_param = { 'vocab_size': len(word2ind), 'embed_dim': 100, 'class_num': len(label_w2n), "kernel_num": 100, "kernel_size": [3, 4, 5], "dropout": 0.5, } dataLoader_param = { 'batch_size': 50, 'shuffle': True, } def main():