Python get_lines 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: script.tool.__bigfile

메소드/함수: get_lines

hotexamples.com에서의 예제들: 5

Python get_lines - 5개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 script.tool.__bigfile.get_lines에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: splitword.py 프로젝트: jlinka/graduate

def wikisplit2word():
    if os.path.exists(config.CORPUS_DIC + '/wiki_chs'):
        with open(config.PREDATA_DIC + '/totalpart.txt', 'a',
                  encoding='utf-8') as write_file:
            print('开始分词')
            for line in __bigfile.get_lines(config.CORPUS_DIC + '/wiki_chs'):
                if line:
                    write_file.write(' '.join(jieba.lcut(line)))
            print('分词结束')
    else:
        raise FileNotFoundError('{} 不存在'.format(config.CORPUS_DIC +
                                                '/wiki_chs'))

예제 #2

파일 보기

파일: splitword.py 프로젝트: jlinka/graduate

def othersplit2word(filepath: str):
    if os.path.exists(filepath):

        with open(config.PREDATA_DIC + '/' + filepath.split('/')[-1],
                  'a',
                  encoding='utf-8') as write_file:
            print('开始分词')
            for line in __bigfile.get_lines(filepath):
                if line:
                    write_file.write(' '.join(jieba.lcut(line)))
            print('分词结束')
    else:
        raise FileNotFoundError('{} 不存在'.format(filepath))

예제 #3

파일 보기

def deal_tagdata(tagdata_filepaths: list, rate: float = config.SR_RATE):
    datas = []
    for tagdata_filepath in tagdata_filepaths:
        if os.path.exists(tagdata_filepath):
            for line in __bigfile.get_lines(tagdata_filepath):
                datas.append(line)
        else:
            raise FileNotFoundError('{} 标注数据文件不存在'.format(tagdata_filepath))

    random.shuffle(datas)  # 打乱数据

    sentences, labels = __split_tagdata(datas)

    datas.clear()

    words_list = __tagsentence2regwords(sentences)

    sentences.clear()

    sentencevec_list, labelvec_list = __data2vec(words_list, labels)

    words_list.clear()
    labels.clear()

    # 将数据保存下来
    total_size = len(labelvec_list)

    train_x = sentencevec_list[:int(total_size * rate)]
    train_y = labelvec_list[:int(total_size * rate)]
    test_x = sentencevec_list[int(total_size * rate):]
    test_y = labelvec_list[int(total_size * rate):]

    sentencevec_list.clear()
    labelvec_list.clear()

    if rate == 1.0:
        # 特殊要求
        if len(train_x) > 0:
            np.save(config.PREDATA_DIC + '/strain_x.npy', np.array(train_x))
            np.save(config.PREDATA_DIC + '/strain_y.npy', np.array(train_y))
        else:
            raise ValueError('rate为1.0，但数据长度为0')

    elif rate == 0.0:
        # 特殊要求
        if len(test_x) > 0:
            np.save(config.PREDATA_DIC + '/stest_x.npy', np.array(test_x))
            np.save(config.PREDATA_DIC + '/stest_y.npy', np.array(test_y))
        else:
            raise ValueError('rate为0.0，但数据长度为0')

    elif rate > 0.0 and rate < 1.0:
        train_size = len(train_x)
        test_size = len(test_x)

        if train_size <= 0 or test_size <= 0:
            raise ValueError('数据长度为0')

        # 正常要求
        np.save(config.PREDATA_DIC + '/strain_x.npy', np.array(train_x))
        np.save(config.PREDATA_DIC + '/strain_y.npy', np.array(train_y))
        np.save(config.PREDATA_DIC + '/stest_x.npy', np.array(test_x))
        np.save(config.PREDATA_DIC + '/stest_y.npy', np.array(test_y))

    else:
        raise ValueError('rate 超出范围，rate应该在0.0和1.0之间 rate:{}'.format(rate))

예제 #4

파일 보기

파일: __tag.py 프로젝트: jlinka/graduate

def __get_sentences_generator(filepath: str):
    for resume in __bigfile.get_lines(filepath):
        yield __splitsentence.resume2sentences(resume)

예제 #5

파일 보기

파일: __tag.py 프로젝트: jlinka/graduate

def __get_inputs_generator(filepath: str):
    for resume in __bigfile.get_lines(filepath):
        sentences = __splitsentence.resume2sentences(resume)
        words_list = srpre.sentence2regwords(sentences)
        yield srpre.sentence2vec(words_list)