Python read_json_lines 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: pymaptools.io

메소드/함수: read_json_lines

hotexamples.com에서의 예제들: 11

Python read_json_lines - 11개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 pymaptools.io.read_json_lines에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: vectorize.py 프로젝트: escherba/flaubert

def run(args):
    data = list(vectorize_sentences(chain(*(read_json_lines(fn) for fn in args.input))))
    X, y = zip(*data)
    cfg = CONFIG['train']
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=cfg['test_size'], random_state=cfg['random_state'])
    pickle.dump((X_train, y_train), args.output)
    pickle.dump((X_test, y_test), args.output)

예제 #2

파일 보기

파일: vectorize.py 프로젝트: Livefyre/flaubert

def run(args):
    enum = enumerator()
    data = list(vectorize_sentences(enum, chain(*(read_json_lines(fn) for fn in args.input))))
    X, y = zip(*data)
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=0)
    pickle.dump((X_train, y_train), args.output)
    pickle.dump((X_test, y_test), args.output)

예제 #3

파일 보기

파일: predictions.py 프로젝트: XinDing136/clustering-metrics

def do_reducer(args):
    import pandas as pd
    obj = ndjson2col(read_json_lines(args.input))
    df = pd.DataFrame.from_dict(obj)
    csv_path = os.path.join(args.output, "summary.csv")
    logging.info("Writing brief summary to %s", csv_path)
    df.to_csv(csv_path)
    create_plots(args, df)

예제 #4

파일 보기

파일: predictions.py 프로젝트: pombredanne/lsh-hdc

def do_reducer(args):
    import pandas as pd

    obj = ndjson2col(read_json_lines(args.input))
    df = pd.DataFrame.from_dict(obj)
    csv_path = os.path.join(args.output, "summary.csv")
    logging.info("Writing brief summary to %s", csv_path)
    df.to_csv(csv_path)
    create_plots(args, df)

예제 #5

파일 보기

파일: train.py 프로젝트: escherba/flaubert

def sample_by_y(args):
    sample = chain.from_iterable(read_json_lines(x) for x in args.sentences)
    cfg = CONFIG['train']
    label_counts = cfg.get('sample_labeled')
    if label_counts:
        sample = reservoir_dict(sample, "Y", label_counts,
                                random_state=cfg['random_state'])
    sentences, yvals = zip(*[(obj['X'], obj['Y']) for obj in sample])
    y_labels = np.array(yvals, dtype=float)
    return sentences, y_labels

예제 #6

파일 보기

파일: strings.py 프로젝트: escherba/lsh-hdc

def do_reducer(args):
    import pandas as pd
    obj = ndjson2col(read_json_lines(args.input))
    df = pd.DataFrame.from_dict(obj)
    subset = get_df_subset(
        df, [args.group_by, args.x_axis, args.trial] + args.metrics)
    csv_path = os.path.join(args.output, "summary.csv")
    logging.info("Writing brief summary to %s", csv_path)
    subset.to_csv(csv_path)
    create_plots(args, subset, args.metrics)

예제 #7

파일 보기

def run(args):
    enum = enumerator()
    data = list(
        vectorize_sentences(enum,
                            chain(*(read_json_lines(fn)
                                    for fn in args.input))))
    X, y = zip(*data)
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.2,
                                                        random_state=0)
    pickle.dump((X_train, y_train), args.output)
    pickle.dump((X_test, y_test), args.output)

예제 #8

파일 보기

파일: train.py 프로젝트: skallumadi/flaubert

def get_data(args):

    feature_set_names = CONFIG['train']['features']
    if set(feature_set_names).intersection(['word2vec', 'doc2vec'
                                            ]) and not args.embedding:
        raise RuntimeError("--embedding argument must be supplied")

    # get Y labels
    training_set = read_tsv(args.train)
    y_labels = training_set["sentiment"]

    sentences = [obj['review'] for obj in read_json_lines(args.sentences)]

    if not args.embedding or feature_set_names == ['bow']:
        # don't drop NaNs -- have a sparse matrix here
        return False, (get_bow_features(sentences), y_labels)

    # load embedding
    if CONFIG['pretrain']['algorithm'] == 'word2vec':
        embedding = word2vec.Word2Vec.load(args.embedding)
    elif CONFIG['pretrain']['algorithm'] == 'glove':
        embedding = Glove.load(args.embedding)
        # dynamicaly add GloveWrapper mixin
        embedding.__class__ = type('MyGlove', (Glove, GloveWrapper), {})

    # get feature vectors
    if 'doc2vec' in CONFIG['train']['features']:
        embedding_vectors = get_doc2vec_features(sentences, embedding)
    elif 'word2vec' in CONFIG['train']['features']:
        embedding_vectors = get_word2vec_features(sentences, embedding)
    else:
        raise RuntimeError("Invalid config setting train:features=%s" %
                           CONFIG['train']['features'])

    if 'bow' in feature_set_names:
        return True, get_mixed_features(sentences, embedding_vectors, y_labels)
    else:
        # matrix is dense -- drop NaNs
        return False, drop_nans(embedding_vectors, y_labels)

예제 #9

파일 보기

파일: train.py 프로젝트: Livefyre/flaubert

def get_data(args):

    feature_set_names = CONFIG['train']['features']
    if set(feature_set_names).intersection(['word2vec', 'doc2vec']) and not args.embedding:
        raise RuntimeError("--embedding argument must be supplied")

    # get Y labels
    training_set = read_tsv(args.train)
    y_labels = training_set["sentiment"]

    sentences = [obj['review'] for obj in read_json_lines(args.sentences)]

    if not args.embedding or feature_set_names == ['bow']:
        # don't drop NaNs -- have a sparse matrix here
        return False, (get_bow_features(sentences), y_labels)

    # load embedding
    if CONFIG['pretrain']['algorithm'] == 'word2vec':
        embedding = word2vec.Word2Vec.load(args.embedding)
    elif CONFIG['pretrain']['algorithm'] == 'glove':
        embedding = Glove.load(args.embedding)
        # dynamicaly add GloveWrapper mixin
        embedding.__class__ = type('MyGlove', (Glove, GloveWrapper), {})

    # get feature vectors
    if 'doc2vec' in CONFIG['train']['features']:
        embedding_vectors = get_doc2vec_features(sentences, embedding)
    elif 'word2vec' in CONFIG['train']['features']:
        embedding_vectors = get_word2vec_features(sentences, embedding)
    else:
        raise RuntimeError("Invalid config setting train:features=%s" % CONFIG['train']['features'])

    if 'bow' in feature_set_names:
        return True, get_mixed_features(sentences, embedding_vectors, y_labels)
    else:
        # matrix is dense -- drop NaNs
        return False, drop_nans(embedding_vectors, y_labels)

예제 #10

파일 보기

파일: utils.py 프로젝트: escherba/flaubert

def json_field_iter(files, field=None):
    for fname in files:
        for doc in read_json_lines(fname):
            yield doc if field is None else doc[field]

예제 #11

파일 보기

def doc_iter(args):
    field = args.field
    for fname in args.input:
        for doc in read_json_lines(fname):
            yield doc[field]