Python Bunch.contents 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: sklearn.datasets.base

클래스/타입: Bunch

메소드/함수: contents

hotexamples.com에서의 예제들: 3

Python Bunch.contents - 3개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 sklearn.datasets.base.Bunch.contents에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

Bunch(30)

data(22)

label(6)

remaining(5)

predicted(5)

sentence(3)

oracle(3)

contents(3)

fixk(3)

labels(3)

X(2)

entities(2)

keys(2)

kwords(2)

filenames(2)

text(2)

meta(2)

lable(1)

masker(1)

hyperparams(1)

offset(1)

groups(1)

mask(1)

fig(1)

func(1)

condition_mask(1)

accu(1)

ax(1)

bow(1)

category_labels(1)

clfreg(1)

cmap(1)

content(1)

feature_names(1)

contents_seq(1)

contents_seq_pad(1)

coordinate_names(1)

coordinates(1)

data_fn(1)

data_test(1)

description(1)

zmaps(1)

예제 #1

파일 보기

def split_train_test(new_databunch, split_value):
    lenth = len(new_databunch.contents)  # 数据总量
    split_point = int((1 - split_value) * lenth)
    trainbunch = Bunch()
    testbunch = Bunch()
    trainbunch.contents = new_databunch.contents[:split_point]
    testbunch.contents = new_databunch.contents[split_point:]
    trainbunch.accu = new_databunch.accu[:split_point]
    testbunch.accu = new_databunch.accu[split_point:]
    return trainbunch, testbunch

예제 #2

파일 보기

def tfidfspace(bunch_file, tfidf_file, train_bunch_file=None):
    tfidfbunch = Bunch(labels=[], contents=[], tdm=[], vocabulary={})
    # 读取bunch_file中的bunch, 将label赋予tfidfbunch中的label
    with open(bunch_file, "rb") as f:
        bunch = pickle.load(f)
    tfidfbunch.label = bunch.label
    tfidfbunch.contents = bunch.contents
    if train_bunch_file is None:  # 此时对训练数据生成tfidf空间
        vectorizer = TfidfVectorizer(max_df=0.4, sublinear_tf=True)
        tfidfbunch.tdm = vectorizer.fit_transform(bunch.contents)
        tfidfbunch.vocabulary = vectorizer.vocabulary_
    else:  # 对测试数据生成tfidf空间，保证与训练集的单词字典是相同的。
        with open(train_bunch_file, "rb") as f:
            train_bunch = pickle.load(f)
        tfidfbunch.vocabulary = train_bunch.vocabulary
        vectorizer = TfidfVectorizer(max_df=0.4,
                                     sublinear_tf=True,
                                     vocabulary=train_bunch.vocabulary)
        tfidfbunch.tdm = vectorizer.fit_transform(bunch.contents)
    # 将tfidfbunch写入tfidf_file
    with open(tfidf_file, "wb") as f:
        pickle.dump(tfidfbunch, f)
    #保存tfidf模型
    joblib.dump(vectorizer, TFIDF_FILE)

예제 #3

파일 보기

파일: 20_save_train_and_test_data.py 프로젝트: xueanxi/learnAi

                elif children.tag == 'contenttitle':
                    contenttitle = children.text
                elif children.tag == 'content':
                    content = str(contenttitle)+' '+str(children.text)
                    if (len(content) > 0):
                        seg = jieba.cut(content, cut_all=False)
                        bunch.contents.append(' '.join(seg))
                    else:
                        bunch.contents.append('null')
        print('finish train file:',filePath)
fileutils.saveBatchObj(trainRawPath, bunch)

# parser all test data and save it to bunch
bunch.lable=[]
bunch.filenames=[]
bunch.contents=[]
contenttitle =''
for file in os.listdir(testDataPath):
    filePath = testDataPath + os.sep + file
    if os.path.isdir(filePath):
        print(file, ' is dir. continue')
        continue
    with open(filePath, 'r') as file:
        text = file.read()
        text = re.sub(u"[\x00-\x08\x0b-\x0c\x0e-\x1f|&]+", u"", text)
        root = ET.fromstring(text)
        for child in root:
            # 第二层节点的标签名称和属性,遍历xml文档的第三层
            for children in child:
                # 第三层节点的标签名称和属性
                bunch.filenames.append(filePath)