Python hook_compressedの例、fileinput.hook_compressed Pythonの例

コード例 #1

0

ファイルを表示

 def handle(self, *args, **options):
     with hook_compressed(options['filename']) as fp:
         all_data = json.load(fp)
     with atomic():
         for info in all_data:
             person = Persona(**info)
             person.save()

コード例 #2

0

ファイルを表示

    def _open(self, fp):
        if 'xml' in fp:
            return fileinput.hook_compressed(fp, 'r')

        if fp.endswith('.gz'):
            reader = codecs.getreader("utf-8")
            return reader(gzip.open(fp))
        return codecs.open(fp, encoding='utf-8', mode='r')

コード例 #3

0

ファイルを表示

 def test_gz_with_encoding_fake(self):
     original_open = gzip.open
     gzip.open = lambda filename, mode: io.BytesIO(b'Ex-binary string')
     try:
         result = fileinput.hook_compressed("test.gz", "3", encoding="utf-8")
     finally:
         gzip.open = original_open
     self.assertEqual(list(result), ['Ex-binary string'])

コード例 #4

0

ファイルを表示

ファイル: test_fileinput.py プロジェクト: emilyemorehouse/ast-and-me

 def do_test_use_builtin_open(self, filename, mode):
     original_open = self.replace_builtin_open(self.fake_open)
     try:
         result = fileinput.hook_compressed(filename, mode)
     finally:
         self.replace_builtin_open(original_open)
     self.assertEqual(self.fake_open.invocation_count, 1)
     self.assertEqual(self.fake_open.last_invocation,
                      ((filename, mode), {}))

コード例 #5

0

ファイルを表示

ファイル: test_fileinput.py プロジェクト: emilyemorehouse/ast-and-me

 def test_gz_ext_fake(self):
     original_open = gzip.open
     gzip.open = self.fake_open
     try:
         result = fileinput.hook_compressed('test.gz', 3)
     finally:
         gzip.open = original_open
     self.assertEqual(self.fake_open.invocation_count, 1)
     self.assertEqual(self.fake_open.last_invocation, (('test.gz', 3), {}))

コード例 #6

0

ファイルを表示

ファイル: test_fileinput.py プロジェクト: emilyemorehouse/ast-and-me

 def test_bz2_ext_fake(self):
     original_open = bz2.BZ2File
     bz2.BZ2File = self.fake_open
     try:
         result = fileinput.hook_compressed('test.bz2', 4)
     finally:
         bz2.BZ2File = original_open
     self.assertEqual(self.fake_open.invocation_count, 1)
     self.assertEqual(self.fake_open.last_invocation, (('test.bz2', 4), {}))

コード例 #7

0

ファイルを表示

ファイル: test_fileinput.py プロジェクト: LPRD/build_tools

    def do_test_use_builtin_open(self, filename, mode):
        original_open = self.replace_builtin_open(self.fake_open)
        try:
            result = fileinput.hook_compressed(filename, mode)
        finally:
            self.replace_builtin_open(original_open)

        self.assertEqual(self.fake_open.invocation_count, 1)
        self.assertEqual(self.fake_open.last_invocation,
                         ((filename, mode), {}))

コード例 #8

0

ファイルを表示

ファイル: test_fileinput.py プロジェクト: LPRD/build_tools

    def test_gz_ext_fake(self):
        original_open = gzip.open
        gzip.open = self.fake_open
        try:
            result = fileinput.hook_compressed("test.gz", 3)
        finally:
            gzip.open = original_open

        self.assertEqual(self.fake_open.invocation_count, 1)
        self.assertEqual(self.fake_open.last_invocation, (("test.gz", 3), {}))

コード例 #9

0

ファイルを表示

ファイル: test_fileinput.py プロジェクト: LPRD/build_tools

    def test_bz2_ext_fake(self):
        original_open = bz2.BZ2File
        bz2.BZ2File = self.fake_open
        try:
            result = fileinput.hook_compressed("test.bz2", 4)
        finally:
            bz2.BZ2File = original_open

        self.assertEqual(self.fake_open.invocation_count, 1)
        self.assertEqual(self.fake_open.last_invocation, (("test.bz2", 4), {}))

コード例 #10

0

ファイルを表示

ファイル: loggly.py プロジェクト: rr204483/loggly

def read_lines_old(file, myvalidator):
       Lines=[]
       try :
               for line in fileinput.hook_compressed(file, "r"):
                       line_obj=Line()
                       if(line_obj.parse_line(file,line, myvalidator)):
                               Lines.append(line_obj)
               return Lines
       except IOError :
               raise Exception(fname+": File is not found. Ignoring this file ")

コード例 #11

0

ファイルを表示

ファイル: loggly.py プロジェクト: rr204483/loggly

def read_lines(file, myvalidator):
	Lines=[]
	try :
		prevvalue="%Y %b %d %H:%M:%S"
		prevdate=datetime.strptime("1970 Jan 01 00:00:00", prevvalue)
		for line in fileinput.hook_compressed(file, "r"):
			line_obj=Line()
			if (line_obj.isDateFound(line)):
				if(line_obj.parse_line(file,line, myvalidator)):
					prevdate=line_obj.linedatetime
					prevvalue=line_obj.datetimeformat
					Lines.append(line_obj)
			else :
				# for handling the lines without date, eg: smf files
				fname=os.path.split(file)[1]
				#print prevdate, prevvalue, line, fname
				line_obj.set(fname, prevdate, prevvalue, line[:-1].lstrip(), fake=True)
				#Lines[-1].text=Lines[-1].text+"\n"+line
				Lines.append(line_obj)

		return Lines
	except IOError :
		raise Exception(fname+": File is not found. Ignoring this file ")

コード例 #12

0

ファイルを表示

def supress_repeated(supress_lines):
    for line in fileinput.hook_compressed(supress_lines, 'r+'):
        if line.rstrip():
            print(line)

コード例 #13

0

ファイルを表示

output_file = sys.argv[2]

# Now ensure that these all exist and we're allowed to write the output
# if we fail because of this, we want to fail before doing a lot of work
if not os.path.exists(input_file):
    print 'input_file "' + input_file + '" does not exist'
    exit()

try:
    output_fid = open(output_file, 'w')
except:
    print 'Error opening output file ' + output_file
    exit()

if input_file[-3:] == '.gz':
    input_fid = fileinput.hook_compressed(input_file, 'r')
else:
    input_fid = open(input_file, 'r')

IDtoQUERY = {}
input_array = []
plates_to_remap = set()
max_plate_id = 0

for line in input_fid:
    line = line.split()
    input_array.append(line)
    query = line[0]
    plate = line[4]
    if int(plate) > max_plate_id:
        max_plate_id = int(plate)

コード例 #14

0

ファイルを表示

if sys.argv.count('-h') + sys.argv.count('-help') + sys.argv.count(
        '--help') > 0:
    help()
    exit()

if sys.argv.count('-header') > 0:
    header()
    exit()

if len(sys.argv) < 2:
    print 'too few arguments (try "-h" for help)'
    exit()

input_file = sys.argv[1]
if input_file[-3:] == '.gz':
    input_fid = hook_compressed(input_file, 'r')
else:
    input_fid = open(input_file, 'r')

queries = set()
arrays = set()
counts = [0, 0, 0, 0]  # neg pos insig nan
labels = ['FG30', 'FG26', 'TS30', 'TS26', 'SCI']
exp_counts = {}
for i in range(len(labels)):
    exp_counts[labels[i]] = 0

input_fid.readline()  # toss header
for line in input_fid:
    line = line.strip().split('\t')
    queries.add(line[0])

コード例 #15

0

ファイルを表示

    help()
    exit()

if len(sys.argv) < 2:
    print('too few arguments (try "-h" for help)')
    exit()

sga_file = sys.argv[1]

# Now ensure that these all exist and we're allowed to write the output
# if we fail because of this, we want to fail before doing a lot of work
if not os.path.exists(sga_file):
    print('sga_file "' + sga_file + '" does not exist')
    exit()

sga_fid = fileinput.hook_compressed(sga_file, 'r')

# default to "raw" input files
QUERY_COL = 0
ARRAY_COL = 1
PLATE_COL = 2
UNIQE_COL = 4
BATCH_COL = 5
if 'raw' in sga_file:
   QUERY_COL = 0
   ARRAY_COL = 1
   PLATE_COL = 2
   UNIQE_COL = 4
   BATCH_COL = 5
elif 'release' in sga_file:
   QUERY_COL = 0

コード例 #16

0

ファイルを表示

output_file = sys.argv[2]

# Now ensure that these all exist and we're allowed to write the output
# if we fail because of this, we want to fail before doing a lot of work
if not os.path.exists(combined_data_file):
    print 'combined_data_file "' + combined_data_file + '" does not exist'
    exit()
try:
    output_fid = open(output_file, 'w')
except:
    print 'Error opening output file: ' + output_file
    exit()

## Step 1: Split each line and add _SETID to the first field
if combined_data_file[-3:] == '.gz':
    combined_data_fid = fileinput.hook_compressed(combined_data_file, 'r')
else:
    combined_data_fid = open(combined_data_file, 'r')

SEEN_QUERIES = set()
for line in combined_data_fid:
    line = line.split()
    query = line[SGA_QUERY_COL]
    set = line[SGA_SET_COL]
    # Add _set to query name
    queryset = query + '_' + set
    line[SGA_QUERY_COL] = queryset
    output_fid.write('\t'.join(line))
    output_fid.write('\n')

combined_data_fid.close()

コード例 #17

0

ファイルを表示

ファイル: allcat.py プロジェクト: lynxman/scripts

#!/usr/bin/python
import fileinput
import sys
for file in sys.argv[1:]:
    fh = fileinput.hook_compressed(file, 'r')
    data = fh.read(32768)
    while data:
        sys.stdout.write(data)
        data = fh.read(32768)

コード例 #18

0

ファイルを表示

   return

################ MAIN FUNCTION

if sys.argv.count('-h') + sys.argv.count('-help') + sys.argv.count('--help') > 0:
   help()
   sys.exit()

if len(sys.argv) != 3:
   print('Wrong number of arguments, try "-help"', file=sys.stderr)
   sys.exit()

SGAfile = sys.argv[1]
OtherData = sys.argv[2]

fid_1 = fileinput.hook_compressed(SGAfile, 'r')
fid_2 = fileinput.hook_compressed(OtherData, 'r')

max_plate = 0
max_batch = 0
for line in fid_1:
   if SGAfile[-3:] == '.gz':
      line = line.decode('utf-8').strip()
   else:
      line = line.strip()

   split_line = line.split('\t')
   plate = int(split_line[4])
   batch = int(split_line[5])
   if plate > max_plate:
      max_plate = plate

コード例 #19

0

ファイルを表示

ファイル: nodes.py プロジェクト: xxoolm/Ryven

 def update_event(self, inp=-1):
     self.set_output_val(
         0, fileinput.hook_compressed(self.input(0), self.input(1)))

コード例 #20

0

ファイルを表示

## Step 3: For each query, split the set ids into two groups
group1 = [int(x) - 1 for x in split_param.split(',')]
size1 = len(group1)
size2 = {}

setA = {}
for query in replicate_queries:
    setA[query] = [list(query_setids[query])[x] for x in group1]
    size2[query] = len(query_setids[query]) - size1

## Step 4: Iterate through the scorefile
# keep replicate queries, renameing them
# keep anything in keep_batches
# Result can be appended to a short set.

if big_data_file[-3:] == '.gz':
    big_data_fid = fileinput.hook_compressed(big_data_file, 'r')
else:
    big_data_fid = open(big_data_file, 'r')

for line in big_data_fid:
    line = line.strip().split('\t')
    if line[0] in replicate_queries:
        if line[3] in setA[line[0]]:
            line[0] = line[0] + '_A' + str(size1)
        else:
            line[0] = line[0] + '_B' + str(size2[line[0]])
        print('\t'.join(line))
    #elif line[5] in keep_batches:
    #print('\t'.join(line))

コード例 #21

0

ファイルを表示

ファイル: signal_save_raw_data_in_numpy_and_sort_tracks_KaifuRev1.py プロジェクト: kaifulam/Gjj-pheno

            save_path + 'clean_signal-dijet' + "_" + feature_type +
            batch_number, data)
    elif 'bg' in filename:
        np.save(
            save_path + 'clean_bg-dijet' + "_" + feature_type + batch_number,
            data)
    else:
        assert 1 == 0  # Files were not generated. There is a problem with the source filename


#load_path = './'
load_path = "/phys/groups/tev/scratch4/users/kaifulam/dguest/gjj-pheno/v1/"

#filename = "dijet-bg.txt.gz"
#filename = "all-signal.json"
#filename = 'ten_line_signal.json'
filename = 'one_line_signal.json'

filename = load_path + filename

#save_path = './saved_batches/'
save_path = "/phys/groups/tev/scratch4/users/kaifulam/dguest/gjj-pheno/v1/high_mid_low_and_covariance/numpy_data/batches_5000/"

if filename[
        -3:] == '.gz':  # Check if the file is compressed or not and open accordingly
    fid = fileinput.hook_compressed(filename, 'r')
else:
    fid = open(filename)

clean_and_merge_lines(fid, 1, filename, save_path)

コード例 #22

0

ファイルを表示

if len(sys.argv) < 3:
    print 'too few arguments (try "-h" for help)'
    exit()

SGA_file = sys.argv[1]
batch_ids = sys.argv[2:]
#BATCH_file = sys.argv[2]
#batch_ids = set()
#b_fid = open(BATCH_file,'r')
#for line in b_fid:
#    batch_ids.add(line.strip())

# Now ensure that these all exist and we're allowed to write the output
# if we fail because of this, we want to fail before doing a lot of work
if not os.path.exists(SGA_file):
    print 'SGA_file "' + SGA_file + '" does not exist'
    exit()

if SGA_file[-3:] == '.gz':
    SGA_fid = fileinput.hook_compressed(SGA_file, 'r')
else:
    SGA_fid = open(SGA_file, 'r')

for line in SGA_fid:
    line = line.strip()
    parsed = line.split('\t')
    #if parsed[5] not in batch_ids:
    if parsed[5] in batch_ids:
        print(line)

コード例 #23

0

ファイルを表示

#! /usr/bin/env python3
import fileinput

with fileinput.input() as f:
    for line in f:
        print(line, end='')

# ls | ./file_input.py
# __pycache__
# file_input.py
# file_input_multi_files.py
'''
The two following opening hooks are provided by this module:

fileinput.hook_compressed(filename, mode)
Transparently opens files compressed with gzip and bzip2 (recognized by the extensions '.gz' and '.bz2') using the gzip and bz2 modules. If the filename extension is not '.gz' or '.bz2', the file is opened normally (ie, using open() without any decompression).

Usage example: fi = fileinput.FileInput(openhook=fileinput.hook_compressed)

fileinput.hook_encoded(encoding, errors=None)
Returns a hook which opens each file with open(), using the given encoding and errors to read the file.

Usage example: fi = fileinput.FileInput(openhook=fileinput.hook_encoded("utf-8", "surrogateescape"))

Changed in version 3.6: Added the optional errors parameter.
'''