Ejemplo n.º 1
0
import torch
import os
import torch.nn as nn
import numpy as np
import time

from model import textCNN
import sen2inds
import get_wordlists

word2ind, ind2word = get_wordlists.get_worddict()
label_w2n, label_n2w = sen2inds.read_labelFile(
    'textCNN_chinese_CrosssValidation\model_save\label.txt')

textCNN_param = {
    'vocab_size': len(word2ind),
    'embed_dim': 100,
    'class_num': len(label_w2n),
    "kernel_num": 100,
    "kernel_size": [3, 4, 5],
    "dropout": 0.5,
}


def get_testData(file):
    datas = open(file, 'r').read().split('\n')
    datas = list(filter(None, datas))

    return datas

Ejemplo n.º 2
0
# coding=utf-8

import torch
import os
import torch.nn as nn
import numpy as np
import time
from classifier.nets.textcnn import textCNN
import sen2inds
import textCNN_data

word2ind, ind2word = sen2inds.get_worddict('wordLabel.txt')
label_w2n, label_n2w = sen2inds.read_labelFile('label.txt')

textCNN_param = {
    'vocab_size': len(word2ind),
    'embed_dim': 60,
    'class_num': len(label_w2n),
    "kernel_num": 16,
    "kernel_size": [3, 4, 5],
    "dropout": 0.5,
}
dataLoader_param = {
    'batch_size': 128,
    'shuffle': True,
}

#init dataset
print('init dataset...')
dataLoader = textCNN_data.textCNN_dataLoader(dataLoader_param)
valdata = textCNN_data.get_valdata()
Ejemplo n.º 3
0
import torch
import os
import torch.nn as nn
import numpy as np
import time

from model import textCNN
import sen2inds
import get_wordlists

word2ind, ind2word = get_wordlists.get_worddict()
label_w2n, label_n2w = sen2inds.read_labelFile(
    'D:\pathon\work\BERTProject-master\\textCNN_chinese\model_save\label.txt')

textCNN_param = {
    'vocab_size': len(word2ind),
    'embed_dim': 50,
    'class_num': len(label_w2n),
    "kernel_num": 20,
    "kernel_size": [3, 4, 5],
    "dropout": 0.5,
}


def get_testData(file):
    datas = open(file, 'r').read().split('\n')
    datas = list(filter(None, datas))

    return datas

Ejemplo n.º 4
0
import torch.nn as nn
import numpy as np
import time
import matplotlib.pyplot as plt
from model import textCNN
import sen2inds
import textCNN_data

trainDataVecFile = 'data/txt/train_data_vec.txt'
testDataVecFile = 'data/txt/test_data_vec.txt'
#word2ind字典:字符为key,角标为value
#ind2word字典:角标为key,字符为value
word2ind, ind2word = sen2inds.get_worddict('data/txt/train_wordLabel.txt')
#label_w2n字典:字符(子板块名)为key,角标为value
#label_n2w字典:角标为key,字符(子板块名)为value
label_w2n, label_n2w = sen2inds.read_labelFile('data/txt/train_label.txt')
weightFile = 'data/pkl/weight.pkl'
textCNN_param = {
    'vocab_size': len(word2ind),
    'embed_dim': 60,
    'class_num': len(label_w2n),
    "kernel_num": 16,
    "kernel_size": [3, 4, 5],
    "dropout": 0.5,
}
dataLoader_param = {
    'batch_size': 128,
    'shuffle': True,
}

Ejemplo n.º 5
0
import torch
import os
import torch.nn as nn
import numpy as np
import time

from model import textCNN
import sen2inds
import textCNN_data
import get_wordlists

word2ind, ind2word = get_wordlists.get_worddict()
label_w2n, label_n2w = sen2inds.read_labelFile(
    'textCNN_chinese\model_save\label.txt')

textCNN_param = {
    'vocab_size': len(word2ind),
    'embed_dim': 100,
    'class_num': len(label_w2n),
    "kernel_num": 100,
    "kernel_size": [3, 4, 5],
    "dropout": 0.5,
}

dataLoader_param = {
    'batch_size': 50,
    'shuffle': True,
}


def main():