コード例 #1
0
 def handle(self, *args, **options):
     with hook_compressed(options['filename']) as fp:
         all_data = json.load(fp)
     with atomic():
         for info in all_data:
             person = Persona(**info)
             person.save()
コード例 #2
0
    def _open(self, fp):
        if 'xml' in fp:
            return fileinput.hook_compressed(fp, 'r')

        if fp.endswith('.gz'):
            reader = codecs.getreader("utf-8")
            return reader(gzip.open(fp))
        return codecs.open(fp, encoding='utf-8', mode='r')
コード例 #3
0
 def test_gz_with_encoding_fake(self):
     original_open = gzip.open
     gzip.open = lambda filename, mode: io.BytesIO(b'Ex-binary string')
     try:
         result = fileinput.hook_compressed("test.gz", "3", encoding="utf-8")
     finally:
         gzip.open = original_open
     self.assertEqual(list(result), ['Ex-binary string'])
コード例 #4
0
 def do_test_use_builtin_open(self, filename, mode):
     original_open = self.replace_builtin_open(self.fake_open)
     try:
         result = fileinput.hook_compressed(filename, mode)
     finally:
         self.replace_builtin_open(original_open)
     self.assertEqual(self.fake_open.invocation_count, 1)
     self.assertEqual(self.fake_open.last_invocation,
                      ((filename, mode), {}))
コード例 #5
0
 def test_gz_ext_fake(self):
     original_open = gzip.open
     gzip.open = self.fake_open
     try:
         result = fileinput.hook_compressed('test.gz', 3)
     finally:
         gzip.open = original_open
     self.assertEqual(self.fake_open.invocation_count, 1)
     self.assertEqual(self.fake_open.last_invocation, (('test.gz', 3), {}))
コード例 #6
0
 def test_bz2_ext_fake(self):
     original_open = bz2.BZ2File
     bz2.BZ2File = self.fake_open
     try:
         result = fileinput.hook_compressed('test.bz2', 4)
     finally:
         bz2.BZ2File = original_open
     self.assertEqual(self.fake_open.invocation_count, 1)
     self.assertEqual(self.fake_open.last_invocation, (('test.bz2', 4), {}))
コード例 #7
0
ファイル: test_fileinput.py プロジェクト: LPRD/build_tools
    def do_test_use_builtin_open(self, filename, mode):
        original_open = self.replace_builtin_open(self.fake_open)
        try:
            result = fileinput.hook_compressed(filename, mode)
        finally:
            self.replace_builtin_open(original_open)

        self.assertEqual(self.fake_open.invocation_count, 1)
        self.assertEqual(self.fake_open.last_invocation,
                         ((filename, mode), {}))
コード例 #8
0
ファイル: test_fileinput.py プロジェクト: LPRD/build_tools
    def test_gz_ext_fake(self):
        original_open = gzip.open
        gzip.open = self.fake_open
        try:
            result = fileinput.hook_compressed("test.gz", 3)
        finally:
            gzip.open = original_open

        self.assertEqual(self.fake_open.invocation_count, 1)
        self.assertEqual(self.fake_open.last_invocation, (("test.gz", 3), {}))
コード例 #9
0
ファイル: test_fileinput.py プロジェクト: LPRD/build_tools
    def test_bz2_ext_fake(self):
        original_open = bz2.BZ2File
        bz2.BZ2File = self.fake_open
        try:
            result = fileinput.hook_compressed("test.bz2", 4)
        finally:
            bz2.BZ2File = original_open

        self.assertEqual(self.fake_open.invocation_count, 1)
        self.assertEqual(self.fake_open.last_invocation, (("test.bz2", 4), {}))
コード例 #10
0
ファイル: loggly.py プロジェクト: rr204483/loggly
def read_lines_old(file, myvalidator):
       Lines=[]
       try :
               for line in fileinput.hook_compressed(file, "r"):
                       line_obj=Line()
                       if(line_obj.parse_line(file,line, myvalidator)):
                               Lines.append(line_obj)
               return Lines
       except IOError :
               raise Exception(fname+": File is not found. Ignoring this file ")
コード例 #11
0
ファイル: loggly.py プロジェクト: rr204483/loggly
def read_lines(file, myvalidator):
	Lines=[]
	try :
		prevvalue="%Y %b %d %H:%M:%S"
		prevdate=datetime.strptime("1970 Jan 01 00:00:00", prevvalue)
		for line in fileinput.hook_compressed(file, "r"):
			line_obj=Line()
			if (line_obj.isDateFound(line)):
				if(line_obj.parse_line(file,line, myvalidator)):
					prevdate=line_obj.linedatetime
					prevvalue=line_obj.datetimeformat
					Lines.append(line_obj)
			else :
				# for handling the lines without date, eg: smf files
				fname=os.path.split(file)[1]
				#print prevdate, prevvalue, line, fname
				line_obj.set(fname, prevdate, prevvalue, line[:-1].lstrip(), fake=True)
				#Lines[-1].text=Lines[-1].text+"\n"+line
				Lines.append(line_obj)

		return Lines
	except IOError :
		raise Exception(fname+": File is not found. Ignoring this file ")
コード例 #12
0
def supress_repeated(supress_lines):
    for line in fileinput.hook_compressed(supress_lines, 'r+'):
        if line.rstrip():
            print(line)
コード例 #13
0
output_file = sys.argv[2]

# Now ensure that these all exist and we're allowed to write the output
# if we fail because of this, we want to fail before doing a lot of work
if not os.path.exists(input_file):
    print 'input_file "' + input_file + '" does not exist'
    exit()

try:
    output_fid = open(output_file, 'w')
except:
    print 'Error opening output file ' + output_file
    exit()

if input_file[-3:] == '.gz':
    input_fid = fileinput.hook_compressed(input_file, 'r')
else:
    input_fid = open(input_file, 'r')

IDtoQUERY = {}
input_array = []
plates_to_remap = set()
max_plate_id = 0

for line in input_fid:
    line = line.split()
    input_array.append(line)
    query = line[0]
    plate = line[4]
    if int(plate) > max_plate_id:
        max_plate_id = int(plate)
コード例 #14
0
if sys.argv.count('-h') + sys.argv.count('-help') + sys.argv.count(
        '--help') > 0:
    help()
    exit()

if sys.argv.count('-header') > 0:
    header()
    exit()

if len(sys.argv) < 2:
    print 'too few arguments (try "-h" for help)'
    exit()

input_file = sys.argv[1]
if input_file[-3:] == '.gz':
    input_fid = hook_compressed(input_file, 'r')
else:
    input_fid = open(input_file, 'r')

queries = set()
arrays = set()
counts = [0, 0, 0, 0]  # neg pos insig nan
labels = ['FG30', 'FG26', 'TS30', 'TS26', 'SCI']
exp_counts = {}
for i in range(len(labels)):
    exp_counts[labels[i]] = 0

input_fid.readline()  # toss header
for line in input_fid:
    line = line.strip().split('\t')
    queries.add(line[0])
コード例 #15
0
    help()
    exit()

if len(sys.argv) < 2:
    print('too few arguments (try "-h" for help)')
    exit()

sga_file = sys.argv[1]

# Now ensure that these all exist and we're allowed to write the output
# if we fail because of this, we want to fail before doing a lot of work
if not os.path.exists(sga_file):
    print('sga_file "' + sga_file + '" does not exist')
    exit()

sga_fid = fileinput.hook_compressed(sga_file, 'r')

# default to "raw" input files
QUERY_COL = 0
ARRAY_COL = 1
PLATE_COL = 2
UNIQE_COL = 4
BATCH_COL = 5
if 'raw' in sga_file:
   QUERY_COL = 0
   ARRAY_COL = 1
   PLATE_COL = 2
   UNIQE_COL = 4
   BATCH_COL = 5
elif 'release' in sga_file:
   QUERY_COL = 0
コード例 #16
0
output_file = sys.argv[2]

# Now ensure that these all exist and we're allowed to write the output
# if we fail because of this, we want to fail before doing a lot of work
if not os.path.exists(combined_data_file):
    print 'combined_data_file "' + combined_data_file + '" does not exist'
    exit()
try:
    output_fid = open(output_file, 'w')
except:
    print 'Error opening output file: ' + output_file
    exit()

## Step 1: Split each line and add _SETID to the first field
if combined_data_file[-3:] == '.gz':
    combined_data_fid = fileinput.hook_compressed(combined_data_file, 'r')
else:
    combined_data_fid = open(combined_data_file, 'r')

SEEN_QUERIES = set()
for line in combined_data_fid:
    line = line.split()
    query = line[SGA_QUERY_COL]
    set = line[SGA_SET_COL]
    # Add _set to query name
    queryset = query + '_' + set
    line[SGA_QUERY_COL] = queryset
    output_fid.write('\t'.join(line))
    output_fid.write('\n')

combined_data_fid.close()
コード例 #17
0
ファイル: allcat.py プロジェクト: lynxman/scripts
#!/usr/bin/python
import fileinput
import sys
for file in sys.argv[1:]:
    fh = fileinput.hook_compressed(file, 'r')
    data = fh.read(32768)
    while data:
        sys.stdout.write(data)
        data = fh.read(32768)
コード例 #18
0
   return

################ MAIN FUNCTION

if sys.argv.count('-h') + sys.argv.count('-help') + sys.argv.count('--help') > 0:
   help()
   sys.exit()

if len(sys.argv) != 3:
   print('Wrong number of arguments, try "-help"', file=sys.stderr)
   sys.exit()

SGAfile = sys.argv[1]
OtherData = sys.argv[2]

fid_1 = fileinput.hook_compressed(SGAfile, 'r')
fid_2 = fileinput.hook_compressed(OtherData, 'r')

max_plate = 0
max_batch = 0
for line in fid_1:
   if SGAfile[-3:] == '.gz':
      line = line.decode('utf-8').strip()
   else:
      line = line.strip()

   split_line = line.split('\t')
   plate = int(split_line[4])
   batch = int(split_line[5])
   if plate > max_plate:
      max_plate = plate
コード例 #19
0
ファイル: nodes.py プロジェクト: xxoolm/Ryven
 def update_event(self, inp=-1):
     self.set_output_val(
         0, fileinput.hook_compressed(self.input(0), self.input(1)))
コード例 #20
0
## Step 3: For each query, split the set ids into two groups
group1 = [int(x) - 1 for x in split_param.split(',')]
size1 = len(group1)
size2 = {}

setA = {}
for query in replicate_queries:
    setA[query] = [list(query_setids[query])[x] for x in group1]
    size2[query] = len(query_setids[query]) - size1

## Step 4: Iterate through the scorefile
# keep replicate queries, renameing them
# keep anything in keep_batches
# Result can be appended to a short set.

if big_data_file[-3:] == '.gz':
    big_data_fid = fileinput.hook_compressed(big_data_file, 'r')
else:
    big_data_fid = open(big_data_file, 'r')

for line in big_data_fid:
    line = line.strip().split('\t')
    if line[0] in replicate_queries:
        if line[3] in setA[line[0]]:
            line[0] = line[0] + '_A' + str(size1)
        else:
            line[0] = line[0] + '_B' + str(size2[line[0]])
        print('\t'.join(line))
    #elif line[5] in keep_batches:
    #print('\t'.join(line))
            save_path + 'clean_signal-dijet' + "_" + feature_type +
            batch_number, data)
    elif 'bg' in filename:
        np.save(
            save_path + 'clean_bg-dijet' + "_" + feature_type + batch_number,
            data)
    else:
        assert 1 == 0  # Files were not generated. There is a problem with the source filename


#load_path = './'
load_path = "/phys/groups/tev/scratch4/users/kaifulam/dguest/gjj-pheno/v1/"

#filename = "dijet-bg.txt.gz"
#filename = "all-signal.json"
#filename = 'ten_line_signal.json'
filename = 'one_line_signal.json'

filename = load_path + filename

#save_path = './saved_batches/'
save_path = "/phys/groups/tev/scratch4/users/kaifulam/dguest/gjj-pheno/v1/high_mid_low_and_covariance/numpy_data/batches_5000/"

if filename[
        -3:] == '.gz':  # Check if the file is compressed or not and open accordingly
    fid = fileinput.hook_compressed(filename, 'r')
else:
    fid = open(filename)

clean_and_merge_lines(fid, 1, filename, save_path)
コード例 #22
0
if len(sys.argv) < 3:
    print 'too few arguments (try "-h" for help)'
    exit()

SGA_file = sys.argv[1]
batch_ids = sys.argv[2:]
#BATCH_file = sys.argv[2]
#batch_ids = set()
#b_fid = open(BATCH_file,'r')
#for line in b_fid:
#    batch_ids.add(line.strip())

# Now ensure that these all exist and we're allowed to write the output
# if we fail because of this, we want to fail before doing a lot of work
if not os.path.exists(SGA_file):
    print 'SGA_file "' + SGA_file + '" does not exist'
    exit()

if SGA_file[-3:] == '.gz':
    SGA_fid = fileinput.hook_compressed(SGA_file, 'r')
else:
    SGA_fid = open(SGA_file, 'r')

for line in SGA_fid:
    line = line.strip()
    parsed = line.split('\t')
    #if parsed[5] not in batch_ids:
    if parsed[5] in batch_ids:
        print(line)
コード例 #23
0
#! /usr/bin/env python3
import fileinput

with fileinput.input() as f:
    for line in f:
        print(line, end='')

# ls | ./file_input.py
# __pycache__
# file_input.py
# file_input_multi_files.py
'''
The two following opening hooks are provided by this module:

fileinput.hook_compressed(filename, mode)
Transparently opens files compressed with gzip and bzip2 (recognized by the extensions '.gz' and '.bz2') using the gzip and bz2 modules. If the filename extension is not '.gz' or '.bz2', the file is opened normally (ie, using open() without any decompression).

Usage example: fi = fileinput.FileInput(openhook=fileinput.hook_compressed)

fileinput.hook_encoded(encoding, errors=None)
Returns a hook which opens each file with open(), using the given encoding and errors to read the file.

Usage example: fi = fileinput.FileInput(openhook=fileinput.hook_encoded("utf-8", "surrogateescape"))

Changed in version 3.6: Added the optional errors parameter.
'''