Python parse_from_file 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: session_parser

메소드/함수: parse_from_file

hotexamples.com에서의 예제들: 4

Python parse_from_file - 4개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 session_parser.parse_from_file에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: times_skipped_2.py 프로젝트: varunagarwal538bly/personalized_search_challenge

import numpy as np

this_file_path = os.path.realpath(__file__)  # this file's path
home_dir = os.path.dirname(os.path.dirname(os.path.dirname(this_file_path)))

sys.path.insert(0, home_dir + "/lib")  # for importing functions
import session_parser as sp

# For testing
#train_path = home_dir + '/data/train_head_10k'
#train_path = home_dir + '/data/train_head_million'
#train_path = home_dir + '/data/train_sample_10k'

# For real
train_path = home_dir + '/data/train'
session_generator = sp.parse_from_file(train_path)

session_count = 0

# Skips greater than or equal to this value will be aggregated
limit = 3
# 2D array stores the sums for each position for each 'number of skips'
counts = [np.zeros(limit + 1, dtype=int) for i in range(10)]
# 2D array stores the corresponding lengths
lengths = [np.zeros(limit + 1, dtype=int) for i in range(10)]

while True:
    try:
        # Print at every millionth session
        if session_count % (10**6) == 0:
            print "...reading the {0}th session".format(session_count)

예제 #2

파일 보기

파일: global_url_distribution.py 프로젝트: yksugi/personalized_search_challenge

        for key in dict:
            if key in findict:
                findict[key] += dict[key]
            else:
                findict[key] = dict[key]



this_file_path = os.path.realpath(__file__) # this file's path
home_dir = os.path.dirname(os.path.dirname(this_file_path))

sys.path.insert(0, home_dir + "/script") # for importing functions
import session_parser as sp

train_path = home_dir + '/data/train_sample'
session_generator = sp.parse_from_file(train_path)

session_count = 0

while True:
    try:
        # Print at every millionth session
        if session_count % (10 ** 6) == 0:
            print "...reading the {0}th session".format(session_count)

        # next() raises the StopIteration exeption when hitting the end
        session = session_generator.next()

        queryParse(session.queries)

        session_count += 1

예제 #3

파일 보기

# Runs in ~122 seconds for the entire test data.

import os
import sys
import pandas as pd

this_file_path = os.path.realpath(__file__)  # this file's path
home_dir = os.path.dirname(os.path.dirname(os.path.dirname(this_file_path)))

sys.path.insert(0, home_dir + "/lib")  # for importing functions
import session_parser as sp

# test_path = home_dir + '/data/test_head' # a small header file for testing
# print "WARNING, this script is using a header file, not the real file."
test_path = home_dir + '/data/test'  # for real
session_generator = sp.parse_from_file(test_path)

# Read results for skipped and global versions
# Ignore the first 3 lines in CSV as they are comments.
skipped_means = pd.read_csv(home_dir + '/data/results/skipped_means.csv',\
    sep=",", skipinitialspace=True, header='infer', skiprows=3).skipped_means
global_means = pd.read_csv(home_dir + '/data/results/global_means.csv',\
    sep=",", skipinitialspace=True, header='infer', skiprows=3).global_means

# File for writing our predictions
# Strategy 2 - Goal 1, with a bug.  We fixed a bug, so we should try running this
#  with the fixed algorithm again.
results = open(home_dir + '/data/prediction/s2_goal1_with_bug', 'w')
results.write("SessionID,URLID\n")

session_count = 0

예제 #4

파일 보기

파일: s2_goal1.py 프로젝트: yksugi/personalized_search_challenge

# Runs in ~122 seconds for the entire test data.

import os
import sys
import pandas as pd

this_file_path = os.path.realpath(__file__) # this file's path
home_dir = os.path.dirname(os.path.dirname(os.path.dirname(this_file_path)))

sys.path.insert(0, home_dir + "/lib") # for importing functions
import session_parser as sp

# test_path = home_dir + '/data/test_head' # a small header file for testing
# print "WARNING, this script is using a header file, not the real file."
test_path = home_dir + '/data/test' # for real
session_generator = sp.parse_from_file(test_path)

# Read results for skipped and global versions
# Ignore the first 3 lines in CSV as they are comments.
skipped_means = pd.read_csv(home_dir + '/data/results/skipped_means.csv',\
    sep=",", skipinitialspace=True, header='infer', skiprows=3).skipped_means
global_means = pd.read_csv(home_dir + '/data/results/global_means.csv',\
    sep=",", skipinitialspace=True, header='infer', skiprows=3).global_means

# File for writing our predictions
# Strategy 2 - Goal 1, with a bug.  We fixed a bug, so we should try running this
#  with the fixed algorithm again.
results = open(home_dir + '/data/prediction/s2_goal1_with_bug','w')
results.write("SessionID,URLID\n")

session_count = 0