Python Bunch.contents Exemples

Langage de programmation: Python

Espace de nommage/Pack: sklearn.datasets.base

Class/Type: Bunch

Méthode/Fonction: contents

Exemples au hotexamples.com: 3

Python Bunch.contents - 3 exemples trouvés. Ce sont les exemples réels les mieux notés de sklearn.datasets.base.Bunch.contents extraits de projets open source. Vous pouvez noter les exemples pour nous aider à en améliorer la qualité.

Méthodes fréquemment utilisées

Afficher Cacher

Bunch(30)

data(22)

label(6)

remaining(5)

predicted(5)

sentence(3)

oracle(3)

contents(3)

fixk(3)

labels(3)

X(2)

entities(2)

keys(2)

kwords(2)

filenames(2)

text(2)

meta(2)

lable(1)

masker(1)

hyperparams(1)

offset(1)

groups(1)

mask(1)

fig(1)

func(1)

condition_mask(1)

accu(1)

ax(1)

bow(1)

category_labels(1)

clfreg(1)

cmap(1)

content(1)

feature_names(1)

contents_seq(1)

contents_seq_pad(1)

coordinate_names(1)

coordinates(1)

data_fn(1)

data_test(1)

description(1)

zmaps(1)

Méthodes fréquemment utilisées

Bunch (30)

data (22)

label (6)

remaining (5)

predicted (5)

sentence (3)

oracle (3)

contents (3)

fixk (3)

labels (3)

Méthodes fréquemment utilisées

X (2)

entities (2)

keys (2)

kwords (2)

filenames (2)

text (2)

meta (2)

lable (1)

masker (1)

hyperparams (1)

offset (1)

groups (1)

mask (1)

fig (1)

func (1)

condition_mask (1)

accu (1)

ax (1)

bow (1)

category_labels (1)

Méthodes fréquemment utilisées

offset (1)

groups (1)

mask (1)

fig (1)

func (1)

condition_mask (1)

accu (1)

ax (1)

bow (1)

category_labels (1)

clfreg (1)

cmap (1)

content (1)

feature_names (1)

contents_seq (1)

contents_seq_pad (1)

coordinate_names (1)

coordinates (1)

data_fn (1)

data_test (1)

description (1)

zmaps (1)

Méthodes fréquemment utilisées

clfreg (1)

cmap (1)

content (1)

feature_names (1)

contents_seq (1)

contents_seq_pad (1)

coordinate_names (1)

coordinates (1)

data_fn (1)

data_test (1)

description (1)

zmaps (1)

Exemple #1

0

Afficher le fichier

def split_train_test(new_databunch, split_value): lenth = len(new_databunch.contents) # 数据总量 split_point = int((1 - split_value) * lenth) trainbunch = Bunch() testbunch = Bunch() trainbunch.contents = new_databunch.contents[:split_point] testbunch.contents = new_databunch.contents[split_point:] trainbunch.accu = new_databunch.accu[:split_point] testbunch.accu = new_databunch.accu[split_point:] return trainbunch, testbunch

Exemple #2

0

Afficher le fichier

def tfidfspace(bunch_file, tfidf_file, train_bunch_file=None): tfidfbunch = Bunch(labels=[], contents=[], tdm=[], vocabulary={}) # 读取bunch_file中的bunch, 将label赋予tfidfbunch中的label with open(bunch_file, "rb") as f: bunch = pickle.load(f) tfidfbunch.label = bunch.label tfidfbunch.contents = bunch.contents if train_bunch_file is None: # 此时对训练数据生成tfidf空间 vectorizer = TfidfVectorizer(max_df=0.4, sublinear_tf=True) tfidfbunch.tdm = vectorizer.fit_transform(bunch.contents) tfidfbunch.vocabulary = vectorizer.vocabulary_ else: # 对测试数据生成tfidf空间，保证与训练集的单词字典是相同的。 with open(train_bunch_file, "rb") as f: train_bunch = pickle.load(f) tfidfbunch.vocabulary = train_bunch.vocabulary vectorizer = TfidfVectorizer(max_df=0.4, sublinear_tf=True, vocabulary=train_bunch.vocabulary) tfidfbunch.tdm = vectorizer.fit_transform(bunch.contents) # 将tfidfbunch写入tfidf_file with open(tfidf_file, "wb") as f: pickle.dump(tfidfbunch, f) #保存tfidf模型 joblib.dump(vectorizer, TFIDF_FILE)

Exemple #3

0

Afficher le fichier

Fichier : 20_save_train_and_test_data.py Projet : xueanxi/learnAi

elif children.tag == 'contenttitle': contenttitle = children.text elif children.tag == 'content': content = str(contenttitle)+' '+str(children.text) if (len(content) > 0): seg = jieba.cut(content, cut_all=False) bunch.contents.append(' '.join(seg)) else: bunch.contents.append('null') print('finish train file:',filePath) fileutils.saveBatchObj(trainRawPath, bunch) # parser all test data and save it to bunch bunch.lable=[] bunch.filenames=[] bunch.contents=[] contenttitle ='' for file in os.listdir(testDataPath): filePath = testDataPath + os.sep + file if os.path.isdir(filePath): print(file, ' is dir. continue') continue with open(filePath, 'r') as file: text = file.read() text = re.sub(u"[\x00-\x08\x0b-\x0c\x0e-\x1f|&]+", u"", text) root = ET.fromstring(text) for child in root: # 第二层节点的标签名称和属性,遍历xml文档的第三层 for children in child: # 第三层节点的标签名称和属性 bunch.filenames.append(filePath)