Python open Examples, gzip.open Python Examples

Example #1

0

Show file

File: randomMedia.py Project: phantomas1234/fbaproject

def main(path2template, resultsPath, runs):
    import gzip
    tmpLP = metabolism.Metabolism(util.ImportCplex(path2template))
    tmpLP.setReactionObjectiveMinimizeRest('R("R_Ec_biomass_iAF1260_core_59p81M")')
    include = ('R("Mhb_Transp")',
             'R("Mna1b_Transp")',
             'R("Mkb_Transp")',
             'R("Mca2b_Transp")',
             'R("Mcu2b_Transp")',
             'R("Mmg2b_Transp")',
             'R("Mzn2b_Transp")',
             'R("Mmobdb_Transp")',
             'R("Mfe2b_Transp")',
             'R("Mfe3b_Transp")',
             'R("Mcobalt2b_Transp")',
             'R("Mmn2b_Transp")',
             'R("Mclb_Transp")')
    lp = Almaas(copy.copy(tmpLP), alwaysInc=include)
    print lp.lp
    f = lp.generateFluxdist()
    for item in range(runs):
        f = lp.generateFluxdist()
        stringDump = dict2tsv(lp.currDict) + "\n" + f.tsv()
        print stringDump
        path = resultsPath + "iAf1260_fluxDist_" + str(item) + ".tsv.gz"
        lp.lp.initialize()
        gzip.open(path, 'w').write(stringDump)

Example #2

0

Show file

File: test_mio.py Project: ChadFulton/scipy

def test_gzip_simple():
    xdense = np.zeros((20,20))
    xdense[2,3] = 2.3
    xdense[4,5] = 4.5
    x = SP.csc_matrix(xdense)

    name = 'gzip_test'
    expected = {'x':x}
    format = '4'

    tmpdir = mkdtemp()
    try:
        fname = pjoin(tmpdir,name)
        mat_stream = gzip.open(fname,mode='wb')
        savemat(mat_stream, expected, format=format)
        mat_stream.close()

        mat_stream = gzip.open(fname,mode='rb')
        actual = loadmat(mat_stream, struct_as_record=True)
        mat_stream.close()
    finally:
        shutil.rmtree(tmpdir)

    assert_array_almost_equal(actual['x'].todense(),
                              expected['x'].todense(),
                              err_msg=repr(actual))

Example #3

0

Show file

File: hictools.py Project: nh3/hictools

def sam2mat_main(args):
    region_pattern = r'^[^:]+(?::\d+-\d+)?(?:,[^:]+(?::\d+-\d+)?)?$'
    if args.region is not None and re.search(region_pattern, args.region):
        regions = args.region
    elif args.reglist is not None:
        with open(args.reglist) as f:
            regions = [line.rstrip() for line in f]
    else:
        regions = None

    if args.insam is None:
        sam_fh = sys.stdin
    else:
        sam_fh = open(args.insam, 'r')

    bdata = BinnedData(args.fai, regions=regions, resolution=args.resolution)
    bdata.read_sam(sam_fh)
    sam_fh.close()

    if args.clean:
        bdata.clean()
    if args.ice:
        bdata.iterative_correction()

    margins = bdata.dat.sum(axis=0)
    #print(margins)
    #sys.exit()

    try:
        os.makedirs(args.outdir)
    except OSError as e:
        if e.errno != errno.EEXIST:
            raise(e)
    bin_outfile = os.path.join(args.outdir, 'bins.txt.gz')
    contact_outfile = os.path.join(args.outdir, 'contacts.txt.gz')
    matrix_outfile = os.path.join(args.outdir, 'matrix.txt.gz')
    bin_f = gzip.open(bin_outfile, 'wb')
    contact_f = gzip.open(contact_outfile, 'wb')
    matrix_f = gzip.open(matrix_outfile, 'wb')

    for i,chrom1,b1 in bdata.iter_bins():
        bin_mid1 = (b1[0]+b1[1])/2
        if ma.is_masked(margins[i]):
            margin = 0
        else:
            margin = int(margins[i])
        print('{}\t{}\t{}\t{}\t{}'.format(chrom1,0,bin_mid1,margin,int(margin>0)), file=bin_f)
        if bdata.cleaned:
            print('\t'.join(bdata.dat.data[i].astype(str)), file=matrix_f)
        else:
            print('\t'.join(bdata.dat[i].astype(str)), file=matrix_f)
        for j,chrom2,b2 in bdata.iter_bins():
            bin_mid2 = (b2[0]+b2[1])/2
            contact = bdata.dat[i,j]
            if j>i and not ma.is_masked(contact) and contact > 0:
                print('{}\t{}\t{}\t{}\t{}'.format(chrom1,bin_mid1,chrom2,bin_mid2,int(contact)), file=contact_f)

    bin_f.close()
    contact_f.close()
    matrix_f.close()

Example #4

0

Show file

File: new_language.py Project: BinbinBian/USAAR-SemEval-2015

def par_fmt(in_f, out_f):
    with gzip.open(out_f, 'wb') as out:
        for line in gzip.open(in_f):
            (p1, p2, prob) = line.strip().split(' ||| ')
            out.write('{}\n'.format(prob))
            out.write('{}\n'.format(p1))
            out.write('{}\n'.format(p2))

Example #5

0

Show file

File: elasticcurl.py Project: handavid/elasticcurl

  def run(self):
    self.emit("elasticcurl begin")

    self.infile  = None if self.inurl  != -1 else (gzip.open( self.args.input     ) if  self.args.input.endswith("gz") else open( self.args.input     ))
    self.outfile = None if self.outurl != -1 else (gzip.open(self.args.output, 'w') if self.args.output.endswith("gz") else open(self.args.output, 'w'))

    if self.inurl != -1 and self.args.scan: # if we're reading from elasticsearch, initiate scan mode
      cmd = "curl -s -XGET '" + self.args.input + "/_search?search_type=scan&scroll=10m&size=" + str(self.args.limit) + "' -d '{ \"query\" : { \"match_all\" : {} } } '";
      result = json.loads(subprocess.check_output(cmd, shell=True))
      self.scroll_id = result['_scroll_id']

    itemsin  = 0
    itemsout = 0
    while True:
      offset = itemsin * self.args.jobs + self.args.limit * self.args.id
      itemsread = self.get_items(self.args.limit, offset)
      if itemsread == 0: break
      itemsin += itemsread
      self.emit("Read " + str(itemsin) + " items total")
      itemswrote = self.put_items()
      itemsout += itemswrote
      self.emit("Wrote " + str(itemsout) + " items total")

    if  self.inurl == -1:  self.infile.close()
    if self.outurl == -1: self.outfile.close()

    self.emit("elasticcurl end")

Example #6

0

Show file

File: crawl.py Project: mgax/MentionGraphs

    def stream_for_day(self, day):
        cache_filename = self.cache_filename_base(day) + '.json.gz'

        if os.path.isfile(cache_filename):
            log.info('found stream in cache: %r on %s', self.keyword, day)

            with gzip.open(cache_filename, 'rb') as f:
                for mention in json.load(f):
                    yield mention

        else:
            cache_file_dirname = os.path.dirname(cache_filename)
            if not os.path.isdir(cache_file_dirname):
                os.makedirs(cache_file_dirname)

            tmp_cache_filename = (cache_filename +
                                  '.%05d.tmp' % randint(0, 10000))
            with gzip.open(tmp_cache_filename, 'wb') as f:
                f.write('[')
                first = True

                stream = super(CachingMentionCounter, self).stream_for_day(day)
                for mention in stream:
                    if not first:
                        f.write(', ')
                    json.dump(mention, f)
                    yield mention

                f.write(']')

            os.rename(tmp_cache_filename, cache_filename)

            log.info('cached stream for %r on %s', self.keyword, day)

Example #7

0

Show file

File: game.py Project: jsheedy/nflgame

def _get_json_data(eid=None, fpath=None):
    """
    Returns the JSON data corresponding to the game represented by eid.

    If the JSON data is already on disk, it is read, decompressed and returned.

    Otherwise, the JSON data is downloaded from the NFL web site. If the data
    doesn't exist yet or there was an error, _get_json_data returns None.

    If eid is None, then the JSON data is read from the file at fpath.
    """
    assert eid is not None or fpath is not None

    if fpath is not None:
        return gzip.open(fpath, 'rt').read()

    fpath = _jsonf % eid
    if os.access(fpath, os.R_OK):
        return gzip.open(fpath, 'rt').read()
    try:
        url = _json_base_url % (eid, eid)
        response = requests.get(url)
        response.raise_for_status()
        return response.text
    except requests.exceptions.HTTPError:
        pass
    except socket.timeout:
        pass
    return None

Example #8

0

Show file

File: SRC_counter2fasta.py Project: GATB/short_read_connector

def convert_SRC_counter_output(read_offsets, SRC_counter_output_file_name, read_bank_queries_file_name, min_avg_coverage_threshold, max_avg_coverage_threshold):
    #OPEN SRC OUTPUT
    if "gz" in SRC_counter_output_file_name:
        srcfile=gzip.open(SRC_counter_output_file_name,"r")
    else: 
        srcfile=open(SRC_counter_output_file_name,"r")
    
    #OPEN BANK OUTPUT
    #We open two streams, one of them (bankfile) is the 'query' from SRC counter (31: in the following example). This stream has less random accesses
    if "gz" in read_bank_queries_file_name:
        bankfile=gzip.open(read_bank_queries_file_name,"r")
    else: 
        bankfile=open(read_bank_queries_file_name,"r")
        
    
    
    #0 3.614286 4 2 5
    #id mean median min max

    
    for line in srcfile.readlines():
        if line[0]=='#': #header
            continue
        line=line.rstrip()
        
        avg_coverage=float(line.split()[1])
        if avg_coverage>=min_avg_coverage_threshold and avg_coverage<=max_avg_coverage_threshold:
            query_read_id=int(line.split()[0])
            print get_read(bankfile,read_offsets[query_read_id],"cov"+line[line.index(" "):].replace(' ', '_')),
    srcfile.close()
    bankfile.close()

Example #9

0

Show file

File: test_gwasutilities.py Project: hakyimlab/MetaXcan

    def setUp(self):
        self.tab_file_name = "__gwasutil_wheader__.txt"

        with gzip.open("%s.gz" % (self.tab_file_name), 'w') as gzfile:
            with open(self.tab_file_name, 'w') as file:
                header_line = "%s\n" % ("\t".join(GwasEntry.HEADER_COMPS))
                file.write(header_line)
                gzfile.write(header_line)

                self.gwas_entries = []
                for i in range(1, 10):
                    self.gwas_entries.append(GwasEntry('1', 1000 * i,
                                "rs%d " % (100000 + numpy.random.randn())))
                    line = "%s\n" % (self.gwas_entries[-1].to_str("\t"))
                    file.write(line)
                    gzfile.write(line)

        self.comma_file_name = "__gwasutil_wheader_c__.txt"

        with gzip.open("%s.gz" % (self.comma_file_name), 'w') as gzfile:
            with open(self.comma_file_name, 'w') as file:
                header_line = "%s\n" % (",".join(GwasEntry.HEADER_COMPS))
                file.write(header_line)
                gzfile.write(header_line)

                self.gwas_entries = []
                for i in range(1, 10):
                    self.gwas_entries.append(GwasEntry('1', 1000 * i,
                                                   "rs%d " % (100000 + numpy.random.randn())))
                    line = "%s\n" % (self.gwas_entries[-1].to_str(","))
                    file.write(line)
                    gzfile.write(line)

Example #10

0

Show file

File: main.py Project: dhimmel/starapi

def get_data(series_id, platform_id, impute = False):
    matrixFilename = get_matrix_filename(series_id, platform_id)
    # setup data for specific platform
    for attempt in (0, 1):
        try:
            headerRows = __getMatrixNumHeaderLines(gzip.open(matrixFilename))
            na_values = ["null", "NA", "NaN", "N/A", "na", "n/a", ""]
            data = pd.io.parsers.read_table(gzip.open(matrixFilename),
                                            skiprows=headerRows,
                                            index_col=["ID_REF"],
                                            na_values=na_values,
                                            skipfooter=1,
                                            engine='python')
        except IOError as e:
            # In case we have corrupt file
            print "Failed loading %s: %s" % (matrixFilename, e)
            os.remove(matrixFilename)
            if attempt:
                raise
            matrixFilename = get_matrix_filename(series_id, platform_id)
    data = clean_data(data) #drop samples
    if len(data.columns) == 1:
        data = data.dropna()
    elif impute:
        data = impute_data(data)
    data = log_data(data) #logc

    data.index = data.index.astype(str)
    data.index.name = "probe"
    data.columns.name = 'gsm_name'
    for column in data.columns:
        data[column] = data[column].astype(np.float64)

    # data.to_csv("float64.data.csv")
    return data

Example #11

0

Show file

File: test.py Project: devjeetr/ufldl-exercises

def getData(imagePath, labelPath):

    imageFile, labelFile = gzip.open(os.path.join(".", imagePath), 'rb'), gzip.open(os.path.join(".", labelPath), 'rb')

    iMagic, iSize, rows, cols = struct.unpack('>IIII', imageFile.read(16))
    lMagic, lSize = struct.unpack('>II', labelFile.read(8))

    x = zeros((lSize, rows, cols), dtype=uint8)
    y = zeros((lSize, 1), dtype=uint8)
    count = 0

    startTime = time()

    for i in range(lSize):
        for row in range(rows):
            for col in range(cols):
                x[i][row][col] = struct.unpack(">B", imageFile.read(1))[0]

        y[i] = struct.unpack(">B", labelFile.read(1))[0]
        count = count + 1
        if count % 101 == 0:
            stdout.write("Image: %d/%d. Time Elapsed: %ds  \r" % (i, lSize, time() - startTime))
            stdout.flush()
        #if count > 600:
#            break
    stdout.write("\n")

    return (x, y)

Example #12

0

Show file

File: deinterleave_fastq.py Project: calizarr/PyPipeline

def main():
    args = options()
    # pdb.set_trace()
    if os.path.splitext(args.file_in)[1] == ".gz":
        f_iter = FastqGeneralIterator(gzip.open(args.file_in, "rU"))
    else:
        f_iter = FastqGeneralIterator(open(args.file_in, "rU"))
    if os.path.splitext(args.file_r1)[1] != ".gz" and os.path.splitext(args.file_r2)[1] != ".gz":
        args.file_r1 += ".gz"
        args.file_r2 += ".gz"
    r1_handle = gzip.open(args.file_r1, "wb")
    r2_handle = gzip.open(args.file_r2, "wb")
    count_r1 = 0
    count_r2 = 0
    for(f_id, f_seq, f_q) in f_iter:
        dic = {"f_id":f_id, "f_seq":f_seq, "f_q":f_q}
        if f_id.endswith("/1"):
            r1_handle.write("@{f_id}\n{f_seq}\n+\n{f_q}\n".format(**dic))
            count_r1 += 1
        elif f_id.endswith("/2"):
            r2_handle.write("@{f_id}\n{f_seq}\n+\n{f_q}\n".format(**dic))
            count_r2 += 1
    r1_handle.close()
    r2_handle.close()
    print("{r1_records} records written to {r1_handle}".format(r1_records=count_r1, r1_handle=args.file_r1))
    print("{r2_records} records written to {r2_handle}".format(r2_records=count_r2, r2_handle=args.file_r2))

Example #13

0

Show file

File: demultiplex.py Project: gitter-badger/ipyrad

def collate_tmps(args):
    """ collate temp files back into 1 sample """
    ## split args
    data, name = args

    ## nproc len list of chunks
    combs = glob.glob(os.path.join(
                      data.dirs.fastqs, "tmp_"+name)+"_R1_*.gz")
    combs.sort(key=lambda x: int(x.split("_")[-1].replace(".gz", "")[0]))

    ## one outfile to write to
    handle_r1 = os.path.join(data.dirs.fastqs, name+"_R1_.fastq.gz")
    with gzip.open(handle_r1, 'wb') as out:
        for fname in combs:
            with gzip.open(fname) as infile:
                out.write(infile.read())
    if "pair" in data.paramsdict["datatype"]:
        ## nproc len list of chunks
        combs = glob.glob(os.path.join(
                          data.dirs.fastqs, "tmp_"+name)+"_R2_*.gz")
        combs.sort()                        
        ## one outfile to write to
        handle_r2 = os.path.join(data.dirs.fastqs, name+"_R2_.fastq.gz")
        with gzip.open(handle_r2, 'wb') as out:
            for fname in combs:
                with gzip.open(fname) as infile:
                    out.write(infile.read())

Example #14

0

Show file

File: alignments.py Project: andersjo/vector-semantics

def count_word_alignments(parallelFile, alignmentFile, lang1WordVectors, lang2WordVectors):
    
    wordAlignDict = {}
    lineNum = 1
    for pLine, aLine in zip(gzip.open(parallelFile, 'r'),gzip.open(alignmentFile, 'r')):
        l1, l2 = pLine.lower().strip().split(' ||| ')
        l1Words, l2Words = (l1.split(), l2.split())
        
        for wordIndexPair in aLine.strip().split():
            i, j = wordIndexPair.split('-')
            i, j = (int(i), int(j))
            ''' count alignment only if both words have word vectors '''
            if l1Words[i] in lang1WordVectors and l2Words[j] in lang2WordVectors:
                if l2Words[j] in wordAlignDict:
                    if l1Words[i] in wordAlignDict[l2Words[j]]:
                        wordAlignDict[l2Words[j]][l1Words[i]] += 1
                    else:
                        wordAlignDict[l2Words[j]][l1Words[i]] = 1
                else:
                    wordAlignDict[l2Words[j]] = {l1Words[i]: 1}
        
        if lineNum%10000 == 0: sys.stderr.write(str(lineNum)+' ')
        lineNum += 1

    sys.stderr.write(str(len(wordAlignDict))+"\n")
    return wordAlignDict

Example #15

0

Show file

File: gharcutil.py Project: arity-r/gharcutil

def events(timefrom, timeto=None):
    timefrom, timeto = resolve_time(timefrom, timeto)
    hour = timedelta(0, 3600)
    currtime = timefrom
    while currtime < timeto:
        jsonname = currtime.date().isoformat()+'-%d.json.gz'%(currtime.hour)
        jsonpath = cache_dir + '/' + jsonname
        jsonurl  = 'http://data.githubarchive.org/' + jsonname
        if not os.path.isfile(jsonpath):
            if not os.path.isdir(cache_dir):
                os.mkdir(cache_dir)
            rsp = requests.get(jsonurl)
            if rsp.status_code == 200:
                with open(jsonpath, 'wb') as fp:
                    fp.write(rsp.content)
            else:
                with gzip.open(jsonpath, 'wb') as fp:
                    fp.write('')
        with gzip.open(jsonpath, 'rb') as fp:
            jsontxts = fp.read().decode('utf-8', errors='ignore').splitlines()
        for j in map(json.loads, filter(valid_json, jsontxts)):
            t = parse_iso_time(j['created_at'])
            if t < timefrom or t > timeto: continue
            yield j
        currtime += hour

Example #16

0

Show file

File: fastqutils.py Project: bogemad/bact_vcp

def unmerge(combined_fname, out_template, gz=False):
	outs = []
	if gz:
		outs.append(gzip.open('%s.1.fastq.gz' % out_template, 'w'))
	else:
		outs.append(open('%s.1.fastq' % out_template, 'w'))

	outidx = 1

	last_read = None
	fq = FASTQ(combined_fname)
	for read in fq.fetch():
		if last_read and last_read.name == read.name:
			outidx += 1
			if len(outs) < outidx:
				if gz:
					outs.append(gzip.open('%s.%s.fastq.gz' % (out_template, outidx), 'w'))
				else:
					outs.append(open('%s.%s.fastq' % (out_template, outidx), 'w'))
			read.write(outs[outidx - 1])
		else:
			outidx = 1
			read.write(outs[0])

		last_read = read

	fq.close()
	for out in outs:
		out.close()

Example #17

0

Show file

File: tests.py Project: sumeet/weblogrotate

	def test_rotate_twice(self):
		_rotate_files(times=2)

		for filename in FILES_TO_ROTATE:
			self.assertEqual(open(filename).read(), '')
			self.assertEqual(gzip.open(filename + '.1').read(), '')
			self.assertEqual(gzip.open(filename + '.2').read(), FILE_CONTENT)

Example #18

0

Show file

File: test_rotate.py Project: gthompson/obspy

 def test_rotate_NE_RTVsPitsa(self):
     """
     Test horizontal component rotation against PITSA.
     """
     # load test files
     # no with due to py 2.6
     f = gzip.open(os.path.join(self.path, 'rjob_20051006_n.gz'))
     data_n = np.loadtxt(f)
     f.close()
     f = gzip.open(os.path.join(self.path, 'rjob_20051006_e.gz'))
     data_e = np.loadtxt(f)
     f.close()
     #test different angles, one from each sector
     for angle in [30, 115, 185, 305]:
         # rotate traces
         datcorr_r, datcorr_t = rotate_NE_RT(data_n, data_e, angle)
         # load pitsa files
         f = gzip.open(os.path.join(self.path,
                                    'rjob_20051006_r_%sdeg.gz' %
                                    angle))
         data_pitsa_r = np.loadtxt(f)
         f.close()
         f = gzip.open(os.path.join(self.path,
                                    'rjob_20051006_t_%sdeg.gz' %
                                    angle))
         data_pitsa_t = np.loadtxt(f)
         f.close()
         # Assert.
         self.assertTrue(np.allclose(datcorr_r, data_pitsa_r, rtol=1E-3,
                                     atol=1E-5))
         self.assertTrue(np.allclose(datcorr_t, data_pitsa_t, rtol=1E-3,
                                     atol=1E-5))

Example #19

0

Show file

File: check_output.py Project: BioinformaticsArchive/biopython

def write_format(file):
    record_parser = GenBank.RecordParser(debug_level = 2)

    print "Testing GenBank writing for %s..." % os.path.basename(file)
    # be able to handle gzipped files
    if '.gz' in file:
        cur_handle = gzip.open(file, "r")
        compare_handle = gzip.open(file, "r")
    else:
        cur_handle = open(file, "r")
        compare_handle = open(file, "r")

    iterator = GenBank.Iterator(cur_handle, record_parser)
    compare_iterator = GenBank.Iterator(compare_handle)
        
    while 1:
        cur_record = iterator.next()
        compare_record = compare_iterator.next()
            
        if cur_record is None or compare_record is None:
            break

        # print "\tTesting for %s" % cur_record.version

        output_record = str(cur_record) + "\n"
        try:
            do_comparison(compare_record, output_record)
        except AssertionError, msg:
            print "\tTesting for %s" % cur_record.version
            print msg

Example #20

0

Show file

File: test_invsim.py Project: Brtle/obspy

    def test_seis_sim_vs_pitsa_2(self):
        """
        Test simulate_seismometer seismometer simulation against seismometer
        simulation of Pitsa - STS-2 seismometer.
        """
        # load test file
        file = os.path.join(self.path, 'rotz_20081028.gz')
        with gzip.open(file) as f:
            data = np.loadtxt(f)

        # paz of test file
        samp_rate = 200.0
        paz_sts2 = {'poles': [-0.03736 - 0.03617j,
                              -0.03736 + 0.03617j],
                    'zeros': [0.0 + 0.0j] * 2,
                    'sensitivity': 1.0,
                    'gain': 1.5}

        for id, paz in INSTRUMENTS.items():
            # simulate instrument
            datcorr = simulate_seismometer(
                data, samp_rate, paz_remove=paz_sts2, paz_simulate=paz,
                water_level=600.0, zero_mean=False, nfft_pow2=True)
            # load pitsa file
            filename = os.path.join(self.path, 'rotz_20081028_%s.gz' % id)
            with gzip.open(filename) as f:
                data_pitsa = np.loadtxt(f)
            # calculate normalized rms
            rms = np.sqrt(np.sum((datcorr - data_pitsa) ** 2) /
                          np.sum(data_pitsa ** 2))
            self.assertTrue(rms < 1e-04)

Example #21

0

Show file

File: test_invsim.py Project: Brtle/obspy

    def test_seis_sim_vs_pitsa1(self):
        """
        Test simulate_seismometer seismometer simulation against seismometer
        simulation of Pitsa - LE3D seismometer.
        """
        # load test file
        filename = os.path.join(self.path, 'rjob_20051006.gz')
        with gzip.open(filename) as f:
            data = np.loadtxt(f)

        # paz of test file
        samp_rate = 200.0
        paz_le3d = {'poles': [-4.21 + 4.66j,
                              -4.21 - 4.66j,
                              -2.105 + 0.0j],
                    'zeros': [0.0 + 0.0j] * 3,
                    'sensitivity': 1.0,
                    'gain': 0.4}

        for id, paz in INSTRUMENTS.items():
            # simulate instrument
            datcorr = simulate_seismometer(
                data, samp_rate, paz_remove=paz_le3d, paz_simulate=paz,
                water_level=600.0, zero_mean=False, nfft_pow2=True)
            # load pitsa file
            filename = os.path.join(self.path, 'rjob_20051006_%s.gz' % id)
            with gzip.open(filename) as f:
                data_pitsa = np.loadtxt(f)
            # calculate normalized rms
            rms = np.sqrt(np.sum((datcorr - data_pitsa) ** 2) /
                          np.sum(data_pitsa ** 2))
            self.assertTrue(rms < 1.1e-05)

Example #22

0

Show file

File: compound_cacher.py Project: jlerman44/component-contribution

    def RebuildCompoundJSON():
        
        kegg_dict = {}
        for d in json.load(gzip.open(OLD_COMPOUND_JSON_FNAME, 'r')):
            cid = d['CID']
            kegg_dict[cid] = {'compound_id': cid,
                              'name': d['name'],
                              'names': d['names'],
                              'inchi': d['InChI']}
        
        # override some of the compounds or add new ones with 'fake' IDs,
        # i.e. C80000 or higher.
        for d in csv.DictReader(open(KEGG_ADDITIONS_TSV_FNAME, 'r'),
                                delimiter='\t'):
            cid = 'C%05d' % int(d['cid'])
            kegg_dict[cid] = {'compound_id': cid,
                              'name': d['name'],
                              'names': [d['name']],
                              'inchi': d['inchi']}
        
        compound_json = [kegg_dict[compound_id] for compound_id in sorted(kegg_dict.keys())]

        new_json = gzip.open(KEGG_COMPOUND_JSON_FNAME, 'w')
        json.dump(compound_json, new_json, sort_keys=True, indent=4)
        new_json.close()

Example #23

0

Show file

File: trim_adapters.py Project: brwnj/repertoire

def main(args):
    mmatch = args.mismatches
    minleng = args.minlength
    r1_primers = fasta_to_dict(args.r1primer)
    r2_primers = fasta_to_dict(args.r2primer)
    r1_out = gzip.open(get_name(args.r1, "trimmed"), 'wb')
    r2_out = gzip.open(get_name(args.r2, "trimmed"), 'wb')
    for i, (r1, r2) in enumerate(izip(readfq(args.r1), readfq(args.r2)), start=1):
        if i % 100000 == 0: print >>sys.stderr, ">> processed %d reads" % i
        assert r1.name.split()[0] == r2.name.split()[0]

        # determine primer being used, trim location
        p1, r1_left_trim = get_primer(r1.seq, r1_primers, mmatch)
        p2, r2_left_trim = get_primer(r2.seq, r2_primers, mmatch)
        if not p1 or not p2: continue

        # find start of RC of primer in opposing sequence
        r1_right_trim = trim_loc(r1.seq[r1_left_trim:], rev_comp(r2_primers[p2]))
        r2_right_trim = trim_loc(r2.seq[r2_left_trim:], rev_comp(r1_primers[p1]))
        
        r1.name = "{id}:{cregion}:{fwork} 1".format(id=r1.name.split()[0], cregion=p1, fwork=p2)
        r2.name = "{id}:{cregion}:{fwork} 2".format(id=r2.name.split()[0], cregion=p1, fwork=p2)
        
        # do the trimming of seq and qual
        r1_full_trim = r1_right_trim + r1_left_trim
        r1.seq = r1.seq[r1_left_trim:r1_full_trim]
        r1.qual = r1.qual[r1_left_trim:r1_full_trim]
        r2_full_trim = r2_right_trim + r2_left_trim
        r2.seq = r2.seq[r2_left_trim:r2_full_trim]
        r2.qual = r2.qual[r2_left_trim:r2_full_trim]
        if len(r1.seq) < minleng or len(r2.seq) < minleng: continue

        # write the records
        r1_out.write(r1.__str__() + "\n")
        r2_out.write(r2.__str__() + "\n")

Example #24

0

Show file

File: TestNormalizeFeatDicts.py Project: fangzheng354/nnutils

 def test_processFeatDicts(self):
     """Test that the basic processing of feature dicts works as expected"""
     ifs = gzip.open(self.FEAT_FILENAME)
     reader = FeatureDictReader(ifs)
     instance = NormalizeFeatDicts();
     instance.loadFeatKeyToColMap(self.MAP_FILENAME);
     
     sMinMaxDict = instance.processFeatureDictList(reader);
     self.assertEqual(sMinMaxDict['testOther'], self.EXP_TEST_OTHER_MINRANGE);
     
     self.assert_(len(sMinMaxDict.keys()) <= len(self.featureList))
     #log.info('The MinMaxDict is %s' % pformat(sMinMaxDict));
     ifs.close()
     
     ifs = gzip.open(self.FEAT_FILENAME)
     reader = FeatureDictReader(ifs)
     newFDictList = [];
     theIdList = [];
     for idx, fdict in instance.normalizeFeatDictList(reader, sMinMaxDict, self.mapObj):
         newFDictList.append(fdict)
         theIdList.append(idx)
     ifs.close();
     
     self.assertEqual(len(newFDictList), len(self.trdataList))
     self.assertEqual(len(theIdList), len(self.trdataList));

Example #25

0

Show file

File: database_io.py Project: P0N3Y/equilibrator

def export_compounds(priority, name, ionic_strength, pMg, pH_list):
    pseudoisomer_fname = DOWNLOADS_PSEUDOISOMER_PREFIX + '_%s.csv.gz' % name
    csv_pseudoisomers = csv.writer(gzip.open(pseudoisomer_fname, 'w'))
    csv_pseudoisomers.writerow(["!MiriamID::urn:miriam:kegg.compound",
                                "!Name", "!dG0 (kJ/mol)",
                                "!nH", "!charge", "!nMg", "!Note"])

    csv_compound_dict = {}
    for pH in pH_list:
        compound_fname = DOWNLOADS_COMPOUND_PREFIX + '_%s_ph%.1f.csv.gz' % (name, pH)
        csv_compound_dict[pH] = csv.writer(gzip.open(compound_fname, 'w'))
        csv_compound_dict[pH].writerow(["!MiriamID::urn:miriam:kegg.compound",
                                        "!Name", "!dG0_prime (kJ/mol)",
                                        "!pH", "!I (mM)", "!T (Kelvin)",
                                        "!Note"])
    
    logging.info("Writing chemical and biochemical formation energies for %s to: %s" %
                 (name, pseudoisomer_fname))
    for compound in models.Compound.objects.all():
        phase = compound.GetDefaultPhaseName()
        rows = compound.ToCSVdG0(priority, phase=phase)        
        csv_pseudoisomers.writerows(rows)
        for pH in pH_list:
            aq_params = conditions.AqueousParams(pH=pH, pMg=pMg,
                                                 ionic_strength=ionic_strength)
            rows = compound.ToCSVdG0Prime(priority, aq_params=aq_params, 
                                          phase=phase)
            csv_compound_dict[pH].writerows(rows)

Example #26

0

Show file

File: macs_genotype_to_hap_files.py Project: mathii/f2

def main(options):

    freq_range=range(options["from"], options["to"]+1)
    
    gt_file=gzip.open(options["gt_file"], "r")
    pos_file=gzip.open(options["pos_file"], "r")
    out_haps=gzip.open(options["out_root"]+"/haps.gz", "w")
    out_haps_fn=[gzip.open(options["out_root"]+"/haps.f"+str(x)+".gz", "w") for x in freq_range]

    out_samples=open(options["out_root"]+"/samples.txt", "w")

    gt=np.genfromtxt(gt_file, delimiter=1)
    pos=np.genfromtxt(pos_file)
    pos=np.floor(pos*options["chr_len"]).astype(int)
    
    gt=gt.transpose().astype(int)
    # This is because on some platforms the np.genfromtxt tries to import the line endings...     
    gt=gt[range(len(pos)),]               
    
    (nsnp,nind)=gt.shape

    ACs=np.sum(gt, axis=1)
    MACs=np.minimum(ACs, nind-ACs)
    for i in range(nsnp):
        out_haps.write(("\t".join(["%d"]*(nind+1))+"\n")%((pos[i],)+tuple(gt[i,])))
        if MACs[i]>=options["from"] and MACs[i]<= options["to"]:
            idx=MACs[i]-options["from"]
            out_haps_fn[idx].write(("\t".join(["%d"]*(nind+1))+"\n")%((pos[i],)+tuple(gt[i,])))

    for i in range(int(nind/2)):
        out_samples.write("SIM%d\n"%(i+1,))
            
    for fil in [gt_file, pos_file, out_haps]+out_haps_fn:
        fil.close()

Example #27

0

Show file

File: merge.py Project: snaikar101/Indexer

	def __init__(self):
		self.ls=[]
		self.outputBuffer=''   #output buffer
		self.of = gzip.open("final_indx",'wb') #final output remove gzip. for debug mode
		self.lexf = gzip.open("final_lex",'wb') #final lexicon structure remove gzip. for debug mode
		self.r=11                               #total number of files
		self.sz=int(math.floor(524288000*2/100/(self.r+1)))

Example #28

0

Show file

File: merge.py Project: snaikar101/Indexer

def nwaymerge(s,e,memory,file_prefix,final_file):
	merge = Merge()
	f=[] #File pointers list
	fr=[] #Input read buffers
	merge.of = gzip.open(final_file+'index','wb') #final output remove gzip. for debug mode
	merge.lexf = gzip.open(final_file+'lex','wb') #final lexicon structure remove gzip. for debug mode
	heap_items=[]
	merge.sz=int(math.floor(memory/(e-s+2)))
	for i in range(s,e+1):
                f.append(gzip.open(file_prefix+str(i),'rb'))
	print("files opened")
	for i in range(e-s+1):
		fr.append(merge.readInput(f[i],merge.sz))
		merge.constructHeap(fr[i][0].split('\n'),heap_items,i) #put first set in heap appends a marker to last element
	
	while len(heap_items)>0:
		tok,doc_id, hi =heapq.heappop(heap_items) #pop tokens
		if tok != '':
			merge.write_final(tok,hi.invitem)
		if hi.is_last== True:      #if last element of file read the next set of data
			if fr[hi.fl][1]:                #checks if file is not empty 
				fr[hi.fl]=merge.readInput(f[hi.fl],merge.sz)
				merge.constructHeap(fr[hi.fl][0].split('\n'),heap_items,hi.fl)
	
	merge.writeLex()   #Writes lexicon details to file

Example #29

0

Show file

File: file_handlers.py Project: eklitzke/index

 def get_highlighted(self, filename, hl_lines=None):
     """Get the highlighted version of a file."""
     hl_lines = sorted(hl_lines or [])
     st = os.stat(filename)
     key = '%s-%d-%s-%s' % (filename, int(st.st_mtime),
                            CACHE_SERIAL, hl_lines)
     key = os.path.join(self.cache_dir,
                        hashlib.sha1(key).hexdigest() + '.html.gz')
     try:
         with gzip.open(key) as keyfile:
             return keyfile.read()
     except IOError:
         with open(filename) as infile:
             file_data = infile.read()
         try:
             lexer = lexers.guess_lexer_for_filename(filename, file_data)
         except pygments.util.ClassNotFound:
             try:
                 lexer = lexers.guess_lexer(file_data)
             except pygments.util.ClassNotFound:
                 lexer = lexers.TextLexer()
         highlight = pygments.highlight(
             file_data, lexer, formatters.HtmlFormatter(
                 hl_lines=hl_lines, linenos='table', lineanchors='line',
                 anchorlinenos=True))
         with gzip.open(key, 'w') as keyfile:
             keyfile.write(highlight.encode('utf-8'))
         return highlight

Example #30

0

Show file

File: marshall.py Project: renlliang3/dustmaps

def dat2hdf5(table_dir):
    """
    Convert the Marshall et al. (2006) map from \*.dat.gz to \*.hdf5.
    """

    import astropy.io.ascii as ascii
    import gzip
    from contextlib import closing

    readme_fname = os.path.join(table_dir, 'ReadMe')
    table_fname = os.path.join(table_dir, 'table1.dat.gz')
    h5_fname = os.path.join(table_dir, 'marshall.h5')

    # Extract the gzipped table
    with gzip.open(table_fname, 'rb') as f:
        # Read in the table using astropy's CDS table reader
        r = ascii.get_reader(ascii.Cds, readme=readme_fname)
        r.data.table_name = 'table1.dat'  # Hack to deal with bug in CDS reader.
        table = r.read(f)
        print(table)

    # Reorder table entries according to Galactic (l, b)
    l = coordinates.Longitude(table['GLON'][:], wrap_angle=180. * units.deg)
    b = table['GLAT'][:]

    sort_idx = np.lexsort((b, l))

    l = l[sort_idx].astype('f4')
    b = b[sort_idx].astype('f4')
    l.shape = (801, 81)
    b.shape = (801, 81)

    # Extract arrays from the table
    chi2_all = np.reshape((table['x2all'][sort_idx]).astype('f4'), (801, 81))
    chi2_giants = np.reshape((table['x2gts'][sort_idx]).astype('f4'),
                             (801, 81))

    A = np.empty((801 * 81, 33), dtype='f4')
    sigma_A = np.empty((801 * 81, 33), dtype='f4')
    dist = np.empty((801 * 81, 33), dtype='f4')
    sigma_dist = np.empty((801 * 81, 33), dtype='f4')

    for k in range(33):
        A[:, k] = table['ext{:d}'.format(k + 1)][sort_idx]
        sigma_A[:, k] = table['e_ext{:d}'.format(k + 1)][sort_idx]
        dist[:, k] = table['r{:d}'.format(k + 1)][sort_idx]
        sigma_dist[:, k] = table['e_r{:d}'.format(k + 1)][sort_idx]

    A.shape = (801, 81, 33)
    sigma_A.shape = (801, 81, 33)
    dist.shape = (801, 81, 33)
    sigma_dist.shape = (801, 81, 33)

    # Construct the HDF5 file
    h5_fname = os.path.join(table_dir, 'marshall.h5')
    filter_kwargs = dict(
        chunks=True,
        compression='gzip',
        compression_opts=3,
        # scaleoffset=4
    )

    with h5py.File(h5_fname, 'w') as f:
        dset = f.create_dataset('A', data=A, **filter_kwargs)
        dset.attrs['description'] = 'Extinction of each bin'
        dset.attrs['band'] = 'Ks (2MASS)'
        dset.attrs['units'] = 'mag'

        dset = f.create_dataset('sigma_A', data=sigma_A, **filter_kwargs)
        dset.attrs['description'] = 'Extinction uncertainty of each bin'
        dset.attrs['band'] = 'Ks (2MASS)'
        dset.attrs['units'] = 'mag'

        dset = f.create_dataset('dist', data=dist, **filter_kwargs)
        dset.attrs['description'] = 'Distance of each bin'
        dset.attrs['units'] = 'kpc'

        dset = f.create_dataset('sigma_dist', data=sigma_dist, **filter_kwargs)
        dset.attrs['description'] = 'Distance uncertainty of each bin'
        dset.attrs['units'] = 'kpc'

        dset = f.create_dataset('chi2_all', data=chi2_all, **filter_kwargs)
        dset.attrs['description'] = 'Chi^2, based on all the stars'
        dset.attrs['units'] = 'unitless'

        dset = f.create_dataset('chi2_giants',
                                data=chi2_giants,
                                **filter_kwargs)
        dset.attrs['description'] = 'Chi^2, based on giants only'
        dset.attrs['units'] = 'unitless'

        # filter_kwargs.pop('scaleoffset')

        dset = f.create_dataset('l', data=l, **filter_kwargs)
        dset.attrs['description'] = 'Galactic longitude'
        dset.attrs['units'] = 'deg'

        dset = f.create_dataset('b', data=b, **filter_kwargs)
        dset.attrs['description'] = 'Galactic latitude'
        dset.attrs['units'] = 'deg'

Example #31

0

Show file

import numpy as np
import gzip  # Entzipper für das Auslesen der Testdaten
import pickle  #Einlesen der Testdaten in einen Array
import os.path

with gzip.open('mnist.pkl.gz', 'rb') as f:  #Öffnet das Trainingsdatenset
    train_set, valid_set, test_set = pickle.load(f, encoding='iso-8859-1')
    #train_set[0] ist eine 50.0000 x 784 Matrix --> Pixel Daten
    #train_set[1] ist eine 50.0000 x 1 Matrix --> Wahre Werte
    #encoding='iso-8859-1' wird benötigt, da ab python 3 ein anderer
    #Kodierungsstandard Verwendung findet und das Trainingsdatenset in
    #Python 2 geschrieben wurde

train_x = train_set[0]
train_y = train_set[1]  #Ziffern in Computerdarstellung

test_x = test_set[0]
test_y = test_set[1]

#überführe Ziffern in Vektoren der passenden Form
train_y_dec = np.zeros([len(train_y), 10])
for i in range(len(train_y)):
    train_y_dec[i][train_y[i]] = 1

test_y_dec = np.zeros([len(test_y), 10])
for i in range(len(test_y)):
    test_y_dec[i][test_y[i]] = 1


#Sigmoid-Funktion
def sig(x):

Example #32

0

Show file

File: helpers.py Project: priyankagagneja/Kaggle-National_Data_Science_Bowl

def _load_data(filename):
    with gzip.open(filename, 'rb') as f:
        data = pickle.load(f)
    return data

Example #33

0

Show file

File: temporary.py Project: jpn--/pines

def TemporaryGzipInflation(gzfile):
	t = TemporaryFile(mode='wb')
	import gzip
	with gzip.open(gzfile, 'rb') as previewfile:
		t.write(previewfile.read())
	return t.name

Example #34

0

Show file

File: split_multi-record_genbank_files.py Project: flass/script_utils

#!/usr/bin/env python2.7

from Bio import SeqIO
import sys, os, gzip, glob

lnfinpat = sys.argv[1:-1]
dirout = sys.argv[-1]

for nfinpat in lnfinpat:
	for nfin in glob.glob(nfinpat):
		if nfin.endswith('.gz'):
			fin = gzip.open(nfin, 'rb')
		else:
			fin = open(nfin, 'r')
		seqrecit = SeqIO.parse(fin, format='genbank')
		for seqrec in seqrecit:
			seqid = seqrec.id
			nfout = seqid+'.gbk'
			with open(os.path.join(dirout, nfout), 'w') as fout:
				SeqIO.write([seqrec], fout, format='genbank')
		fin.close()

Example #35

0

Show file

File: sign_target_files_apks.py Project: tsengvn/platform_build

def SignApk(data, keyname, pw, platform_api_level, codename_to_api_level_map,
            is_compressed):
    unsigned = tempfile.NamedTemporaryFile()
    unsigned.write(data)
    unsigned.flush()

    if is_compressed:
        uncompressed = tempfile.NamedTemporaryFile()
        with gzip.open(unsigned.name, "rb") as in_file, \
             open(uncompressed.name, "wb") as out_file:
            shutil.copyfileobj(in_file, out_file)

        # Finally, close the "unsigned" file (which is gzip compressed), and then
        # replace it with the uncompressed version.
        #
        # TODO(narayan): All this nastiness can be avoided if python 3.2 is in use,
        # we could just gzip / gunzip in-memory buffers instead.
        unsigned.close()
        unsigned = uncompressed

    signed = tempfile.NamedTemporaryFile()

    # For pre-N builds, don't upgrade to SHA-256 JAR signatures based on the APK's
    # minSdkVersion to avoid increasing incremental OTA update sizes. If an APK
    # didn't change, we don't want its signature to change due to the switch
    # from SHA-1 to SHA-256.
    # By default, APK signer chooses SHA-256 signatures if the APK's minSdkVersion
    # is 18 or higher. For pre-N builds we disable this mechanism by pretending
    # that the APK's minSdkVersion is 1.
    # For N+ builds, we let APK signer rely on the APK's minSdkVersion to
    # determine whether to use SHA-256.
    min_api_level = None
    if platform_api_level > 23:
        # Let APK signer choose whether to use SHA-1 or SHA-256, based on the APK's
        # minSdkVersion attribute
        min_api_level = None
    else:
        # Force APK signer to use SHA-1
        min_api_level = 1

    common.SignFile(unsigned.name,
                    signed.name,
                    keyname,
                    pw,
                    min_api_level=min_api_level,
                    codename_to_api_level_map=codename_to_api_level_map)

    data = None
    if is_compressed:
        # Recompress the file after it has been signed.
        compressed = tempfile.NamedTemporaryFile()
        with open(signed.name, "rb") as in_file, \
             gzip.open(compressed.name, "wb") as out_file:
            shutil.copyfileobj(in_file, out_file)

        data = compressed.read()
        compressed.close()
    else:
        data = signed.read()

    unsigned.close()
    signed.close()

    return data

Example #36

0

Show file

File: kmerRefFilter.py Project: qzliu1990/NGSgenotyp

def load_DicFromPickleFile(pickleFilePath):
	pf = gzip.open(pickleFilePath,'rb')
	retVal = cPickle.load(pf)
	pf.close()
	return retVal

Example #37

0

Show file

File: kmerRefFilter.py Project: qzliu1990/NGSgenotyp

def create_PickleFromDict(pickleFilePath, dic):
	pf = gzip.open(pickleFilePath,'wb')
	cPickle.dump(dic,pf)
	pf.close()

Example #38

0

Show file

 def compress(file_name):
     '''Como métodos estáticos não acessam classe nem instância,
     o Python não dá a eles nenhum primeiro parâmetro'''
     with open(file_name, 'rb') as content:
         with gzip.open(file_name + '.gz', 'wb') as gzip_file:
             gzip_file.writelines(content)

Example #39

0

Show file

File: data_iterator.py Project: zhongkeli/NMT_GAN

def fopen(filename, mode='r'):
    if filename.endswith('.gz'):
        return gzip.open(filename, mode)
    return open(filename, mode)

Example #40

0

Show file

def parse(path):
    g = gzip.open(path, 'rb')
    for l in g:
        yield eval(l)

Example #41

0

Show file

File: bayesianDeNovoFilter.py Project: koelling/Platypus

    GQThreshold = args.min_gq

    logger.info('Max parental variant fraction: %f', MAX_PARENTAL_VAR_FRAC)
    logger.info('Min variant fraction in child: %f', MIN_VAR_FRAC_IN_CHILD)
    logger.info('Min reads in child: %d', MIN_READS_IN_CHILD)
    logger.info('logBayesFactorThreshold: %f', logBayesFactorThreshold)
    logger.info('GQ threshold: %d', GQThreshold)

    # Input VCF file of Platypus (or other) calls. Can be gzipped or
    # plain text.
    inVCFName = args.vcf
    inVCFFile = None

    logger.info('Using VCF: %s', inVCFName)
    if inVCFName.endswith("gz"):
        inVCFFile = gzip.open(inVCFName, 'r')
    else:
        inVCFFile = open(inVCFName, 'r')

    pedFileName = args.ped # Name of pedigree file
    logger.info('Using PED: %s', pedFileName)

    extension = args.extension

    # Make output files using the name of the input VCF with various extensions
    outMendelErrorsFileName = inVCFName.split(".")[0] + "_mendelErrors%s.vcf"  %(extension)
    outDeNovoVarsFileName = inVCFName.split(".")[0] + "_deNovoVariants%s.vcf" %(extension)
    outFilteredDeNovoVarsFileName = inVCFName.split(".")[0] + "_deNovoVariantsPassingBayesianFilter%s.vcf" %(extension)

    outMendelErrorFile = open(outMendelErrorsFileName, 'w')                 # Output file to contain list of mendelian inconsistency calls
    outDeNovoVarsFile = open(outDeNovoVarsFileName, 'w')                    # Output file to contain list of de novo variants

Example #42

0

Show file

File: pca.py Project: Foroozani/Machine-Learning

        ax.set_xticks([])
        ax.set_yticks([])

    plt.show()


show_all_digit_components(X, 0)

#%%
"""MNIST Dataset"""

import gzip, pickle

DATA_PATH = 'data/mnist.pkl.gz'

with gzip.open(DATA_PATH, 'rb') as f:
    (X, y), _, _ = pickle.load(f, encoding='latin1')

# As a sanity check, we print out the size of the data.
print('Training data shape:    ', X.shape)
print('Training labels shape:  ', y.shape)

plt.figure(figsize=(12, 6))

pca = PCA().fit(X)  # Notice

plt.bar(range(200),
        pca.explained_variance_ratio_[:200],
        alpha=0.8,
        align='center')

Example #43

0

Show file

File: SAAK_transform.py Project: siyalsonarkar18/Handwritten-Digit-Recognition

sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))
# Runs the op.
print(sess.run(c))
tf.set_random_seed(123)
from sklearn.decomposition import PCA
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import f_classif
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier

#hello = tf.constant('Hello, TensorFlow!')
#sess = tf.Session()
#print(sess.run(hello))
import gzip
f = gzip.open('mnist.pkl.gz', 'rb')
if sys.version_info < (3,):
    data = pickle.load(f)
else:
    data = pickle.load(f, encoding='bytes')
f.close()

(feat_train,l_train),(feat_test,l_test) = data
feat_train = feat_train.astype('float32')
feat_test = feat_test.astype('float32')
feat_train/=255
feat_test/=255
pca_train = feat_train[:,:,:]
pca_train=pca_train.reshape(pca_train.shape[0],28,28,1)
print(pca_train.shape)
npad = ((0, 0), (2, 2), (2, 2))

Example #44

0

Show file

File: merge_graph_json.py Project: yfdyh000/areweslimyet

        cdata['series'][sname].append(None)
      else:
        iseries.sort()
        if len(iseries) % 2 == 1:
          median = iseries[(len(iseries) - 1) / 2]
        else:
          median = iseries[len(iseries) / 2]
        if iseries[0] == median:
          cdata['series'][sname].append(median)
        else:
          cdata['series'][sname].append([iseries[0], median, iseries[-1]])
  return cdata

for fname in files:
  print("Condensing %s" % (fname,))
  f = gzip.open(os.path.join(outdir, fname), 'r')
  fdata = json.loads(f.read())
  f.close()
  if not len(fdata['builds']): continue
  totaldata['allseries'].append({
      'fromtime' : fdata['builds'][0]['time'],
      'totime' : fdata['builds'][-1]['time'],
      'dataname' : fname.replace('.json.gz', '')
    })
  cdata = condense_data(fdata)
  for x in cdata['series'].keys():
    totaldata['series'].setdefault(x, [])
    # If this series just appeared, or was absent from some datafiles before this,
    # make sure we pad out with nulls to keep the indexes lined up
    totaldata['series'][x].extend([None for y in range(len(totaldata['builds']) - len(totaldata['series'][x]))])
    totaldata['series'][x].extend(cdata['series'][x])

Example #45

0

Show file

File: masked_ndvi.py Project: yogesh2209/MSThesisNDVI

def main():
	#
	admGlobMod = 'Ukraine*' if '-demo' in sys.argv else '';
	#

	try: con = psycopg2.connect("dbname='postgres' user='' host='' password=''")
	except: con = psycopg2.connect("dbname='' user='' host='localhost' password=''")

	cur = con.cursor(); con.autocommit = True

	initTable = 'DROP TABLE IF EXISTS new_masked_ndvi;' if '-drop' in sys.argv else ''; print initTable

	cur.execute('%sCREATE TABLE IF NOT EXISTS new_masked_ndvi (region_id text, country text, state text, district text, start_date date, ndvi real, ndvi_count integer, anomaly real, anomaly_count integer, centr_lon real, centr_lat real, PRIMARY KEY (centr_lon, centr_lat, start_date));'%(initTable))

	path = '/database/gimms.gsfc.nasa.gov/';	tmp = '/dev/shm/'; files = defaultdict(str)
	admShp2 = glob2.glob('/database/gimms.gsfc.nasa.gov/shapefiles/admin2_*%s.shp'%(admGlobMod))

	if '-update' in sys.argv:
		# Select the oldest of all countries' last-updates
		cur.execute('SELECT DISTINCT country, MAX(start_date) FROM new_masked_ndvi GROUP BY country ORDER BY MAX(start_date) LIMIT 1')
		updateStart = cur.fetchone()[1] + datetime.timedelta(days=8)
		updateStartDay = updateStart.timetuple().tm_yday
		baseURL = "wget -r -nv -N -c -t 3 -R '*5v3*' -R '*DOY.tif*' ftp://gimms.gsfc.nasa.gov/MODIS/std/GMYD09Q1/tif/NDVI"
		os.chdir('/database/')

		for yrStart in xrange(updateStart.year, datetime.date.today().year + 1):
			dayOfYrStart = updateStartDay if yrStart == updateStart.year else 1
			for dayOfYr in xrange(dayOfYrStart, 365, 8):
				#This 'for' loop downloads both the NDVI and Anomaly files per year/day-of-year pair.
				for pathSuffix in ['/%s/%s*'%(yrStart,dayOfYr),'_anom_S2003-2015/%s/%s*'%(yrStart,dayOfYr)]:
					os.system(baseURL + pathSuffix)

	for fp in glob2.glob(path + '*/**/*.tif.gz'):	files[os.path.split(fp)[1]] = os.path.split(fp)[0]


	def freeSpace():
		freeSpace = os.statvfs('/dev/shm')		# Clear tmp space
		if (freeSpace[2] - freeSpace[3]) * 1.125 > freeSpace[2]:
			delFiles={}
			for fl in glob2.glob('/dev/shm/*.tif'):	delFiles[str(os.path.getctime(fl))] = fl

			delFileKeys = sorted(delFiles.keys())

			print '\n\t*\tFree space: %s, Used space: %s\t*\n'%(freeSpace[3], freeSpace[2] - freeSpace[3])
			print '\t\t\tDeleting %s files, starting with %s\n'%(int(len(delFileKeys) / 1.125), delFiles[delFileKeys[0]])

			for dF in xrange(int(len(delFileKeys) / 1.125)): os.remove(delFiles[delFileKeys[dF]])


	def getSt(shp,f,xy):
		shp=shp; f=f; xy=xy; x=int(xy[1:3]); y=int(xy[4:6])
		# Just get the data (raster_out) - calculating stats further down
		with rasterio.open(f) as src:
			affine = src.affine
			dat = src.read(1)
		xyMask = mask[y*4000:y*4000+4000,x*4000:x*4000+4000]

		statsMask = tuple(xmea for xmea in r.zonal_stats(shp, xyMask, stats='count', geojson_out=True, raster_out=True, affine=affine, nodata=None) if floor((180 + xmea['properties']['mini_raster_affine'][2]) / 9) == int(xy[1:3]) and floor((90 - xmea['properties']['mini_raster_affine'][5]) / 9) == int(xy[4:6]))

		stats = tuple(mea for mea in r.zonal_stats(shp, dat, stats='count', geojson_out=True, raster_out=True, affine=affine, nodata=None) if floor((180 + mea['properties']['mini_raster_affine'][2]) / 9) == int(xy[1:3]) and floor((90 - mea['properties']['mini_raster_affine'][5]) / 9) == int(xy[4:6]))
		return [stats, statsMask]

	def getSQLRecords():
		cur.execute('SELECT DISTINCT country FROM new_masked_ndvi')
		return cur.fetchall()


	if '-demo' not in sys.argv and '-update' not in sys.argv: # IF THIS IS NOT DEMO MODE OR UPDATE MODE...
		records = [recor[0].replace('\xc3\x83\xc2\x85','').lower() for recor in getSQLRecords()]
		# COUNTRY FILTER
		# for filter in ('antarctica','greenland'):	records.append(filter)
		# for filter in ('antarctica','greenland'):	records.append(filter)
	else:
		records = []


	# Open the mask file
	print 'Opening mask file...'
	mask = gdal.Open('/database/gimms.gsfc.nasa.gov/mask.tif')
	print 'Mask file opened'; sys.stdout.flush()
	mask = np.array(mask.GetRasterBand(1).ReadAsArray())
	print 'Mask read into array'; sys.stdout.flush()

	enALS = 2
	for adLevShp in [admShp2]: #This is really just a single-element array since we're only doing Admin Level 2 files.
		for shapeFile in adLevShp:
			lcShapeFile = shapeFile.decode('utf-8').lower()[0:-4] + str(enALS); continu = False

			for rec in records:	continu = True if rec in lcShapeFile else continu

			if continu is True:	continue # Skip this shapefile -=- its records are in the database

			countryDateAnom = defaultdict(dict); countryDateNDVI = defaultdict(dict)
			print '\nShapefile: %s'%(shapeFile); sys.stdout.flush()

			with fiona.open(shapeFile) as shp:
				# Country ident value comes from either the ISO or ISO2 code.
				name0 = 'NAME_ENGLI'; name1 = 'NAME_ENGLI'; name2 = 'NAME_ENGLI'; ident = 'ISO2'
				try:
					shp[0]['properties'][name0]
				except:
					name0 = 'NAME_0'; name1 = 'NAME_1'; name2 = 'NAME_1'; ident = 'HASC_1'
					try:
						shp[0]['properties']['NAME_2'];	name2 = 'NAME_2'; ident = 'HASC_2'
					except:
						pass

				xyList = [];
				for x in xrange(int(floor((180 + shp.bounds[0]) / 9)), int(floor((180 + shp.bounds[2]) / 9)) + 1):
					for y in xrange(int(floor((90 - shp.bounds[3]) / 9)), int(floor((90 - shp.bounds[1]) / 9)) + 1):
						if 1 in mask[y*4000:y*4000+4000,x*4000:x*4000+4000]:
							xyList.append('x%02dy%02d'%(x,y))
						else:
							print 'x%02dy%02d removed due to mask'%(x,y)

				if '-update' in sys.argv:
					yrStart = updateStart.year # Reset for use further down.
					dayOfYrStart = updateStartDay
				else:
					dayOfYrStart = 1
					yrStart = 2002

				# for doy in xrange(209,265,8):
				for doy in xrange(dayOfYrStart,365,8):
					for xy in xyList:
						print '\nXY:%s'%(xy); sys.stdout.flush()

						fdoy = '%03d'%(doy)
						print '\n %s :: '%(fdoy),; sys.stdout.flush()
						for yr in xrange(yrStart,datetime.datetime.now().year + 1):

                        anFl =
                'GMYD09Q1.A%s%s.08d.latlon.%s.6v1.NDVI_anom_S2003-2015.tif.gz'%(yr,fdoy,xy)
							ndFl = 'GMYD09Q1.A%s%s.08d.latlon.%s.6v1.NDVI.tif.gz'%(yr,fdoy,xy)
							if anFl in files.keys() and ndFl in files.keys():
								aF = tmp + anFl[0:-3]	# Anomaly .tif file
								if not os.path.isfile(aF):
									try:
										with gzip.open(os.path.join(files[anFl],anFl),'rb') as aZ:
											with open(aF,'w+b') as aT: aT.write(aZ.read())
									except Exception as gzAf:
										print 'Could not extract', gzAf
										continue

								try:
									aStatsX = getSt(shp,aF,xy)
									aStats = aStatsX[0]
									aMaskStats = aStatsX[1]
									if aStats == ():

										print '! No Data, Skipping this year for this xy for this doy !'
										continue
								except Exception as aStErr:
									print '\tCould not get stats', aStErr
									continue

								print '%s'%(str(yr)[-2:]),; sys.stdout.flush()

								for aStI,aSt in enumerate(aStats): #For each Admin Level 2
                                    #area in the current country (shp file):
									lonLatCentroid =
                                            (aSt['properties']
                                             ['mini_raster_affine'][2],
                                            aSt['properties']
                                             ['mini_raster_affine'][5])
									
									try: aSt['properties'][name0]
									except: continue

									names = '%s:%s:%s'%(aSt['properties'][name0], aSt['properties'][name1], aSt['properties'][name2])
									namesDay = '%s:%s'%(names,fdoy)
									namesDayYear = '%s:%s'%(namesDay,yr)
									regionID = aSt['properties'][ident]
									if regionID is None or regionID == u'':	regionID = aSt['properties']['ISO']

									aMask = [~np.bool_(aMaskStats[aStI]
                                        ['properties']['mini_raster_array']).flatten()]
									aData = ma.masked_array(np.float32
                                        (aSt['properties']['mini_raster_array'])
                                        .flatten(), aMask)

									anom = (ma.masked_outside(aData,0,250).compressed() - 125) * .008
									try:
										countryDateAnom[namesDayYear]['level'] = enALS
										countryDateAnom[namesDayYear]['rId'] = regionID
										countryDateAnom[namesDayYear]['lonlatcentroid'] = lonLatCentroid
										countryDateAnom[namesDayYear]['anom'] = (countryDateAnom[namesDayYear]['anom'] + np.mean(anom)) / 2 if 'anom' in countryDateAnom[namesDayYear].keys() else np.mean(anom)
										countryDateAnom[namesDayYear]['anomcount'] = countryDateAnom[namesDayYear]['anomcount'] + len(anom) if 'anomcount' in countryDateAnom[namesDayYear].keys() else len(anom)
									except:
										continue

Example #46

0

Show file

File: fake_log_gen_orders.py Project: akashkatakam/INFO-7374-production-data-pipelines

faker = Faker()

# timestr = datetime.datetime.strftime(datetime.datetime.now() - datetime.timedelta(30), "%Y%m%d-%H%M%S")


timestr = time.strftime("%Y%m%d-%H%M%S")
otime = datetime.datetime.now()

outFileName = 'orders_log_' + timestr + '.log' if not file_prefix else file_prefix + '_access_log_' + timestr + '.log'

for case in switch(output_type):
    if case('LOG'):
        f = open(outFileName, 'w')
        break
    if case('GZ'):
        f = gzip.open(outFileName + '.gz', 'w')
        break
    if case('CONSOLE'): pass
    if case():
        f = sys.stdout

response = ["200", "404", "500", "301"]

verb = ["GET", "POST"]

# resources = ["/user/register", "/user/login", "/wp-content", "/wp-admin", "/explore", "/search/tag/list",
#              "/app/main/posts",
#              "/posts/posts/explore", "/apps/cart.jsp?appID="]
resources = ["/app/cart " , "/app/orders "]

ualist = [faker.firefox, faker.chrome, faker.safari, faker.internet_explorer, faker.opera]

Example #47

0

Show file

File: CBFdata.py Project: bobcristian996/proxDykstra-SOCP

    def iterator(self):
        self.ver = None               # File version
        self.obj = None               # Problem structure
        self.mapnum = 0
        self.mapstacknum = 0
        self.mapstackdim = list()
        self.mapstackdomain = list()
        self.varnum = 0
        self.varstacknum = 0
        self.varstackdim = list()
        self.varstackdomain = list()
        self.intvarnum = 0
        self.intvar = list()
        self.psdmapnum = 0
        self.psdmapdim = list()
        self.psdvarnum = 0
        self.psdvardim = list()
        self.objfnnz = 0              # Objective coefficients
        self.objfsubj = list()
        self.objfsubk = list()
        self.objfsubl = list()
        self.objfval = list()
        self.objannz = 0
        self.objasubj = list()
        self.objaval = list()
        self.objbval = 0
        self.fnnz = 0                 # Scalar map coefficients
        self.fsubi = list()
        self.fsubj = list()
        self.fsubk = list()
        self.fsubl = list()
        self.fval = list()
        self.annz = 0
        self.asubi = list()
        self.asubj = list()
        self.aval = list()
        self.bnnz = 0
        self.bsubi = list()
        self.bval = list()
        self.hnnz = 0                 # PSD map coefficients
        self.hsubi = list()
        self.hsubj = list()
        self.hsubk = list()
        self.hsubl = list()
        self.hval = list()
        self.dnnz = 0
        self.dsubi = list()
        self.dsubk = list()
        self.dsubl = list()
        self.dval = list()

        self.change = False

        self.simplebounds = False
        simplemapvaridx = list()
        simplemapsign = list()
        simplemapconst = list()

        keyset = 0
        keyquery = self.fullkeyquery.copy()
        (linenum, line) = (-1, "")

        [self.name, filetype] = os.path.splitext(os.path.basename(self.file))
        if filetype.lower() == '.gz':
            self.name = os.path.splitext(self.name)[0]
            ff = gzip.open(self.file, 'rt')
        else:
            ff = open(self.file, 'rt')

        f = enumerate(ff)
        try:
            for (linenum, line) in f:
                line = self.__prepare_line(line)

                # Ignore comments between blocks
                if line.startswith('#'):
                    continue

                # Ignore empty lines between blocks
                if not line:
                    continue

                # Stop when requested information has been gathered
                if len(keyquery) == 0:
                    break

                #
                # Keyword set: File description keywords
                #
                if keyset == 0:
                    if line == "VER":
                        (linenum, line) = next(f)
                        self.ver = int(self.__prepare_line(line))
                        keyquery.discard("VER")
                        keyquery.discard("VER:HEAD")
                        continue

                    # Unrecognized line. Going to next set of keywords.
                    if line in self.keywords:
                        keyset = self.__inc_keyset(keyset)
                        keyquery -= self.keywordqueryset[keyset-1]
                    else:
                        raise Exception('Keyword not recognized')

                #
                # Keyword set: Structural keywords (note the default values)
                #
                if keyset == 1:
                    if line == "OBJSENSE":
                        if self.obj is not None:
                            raise Exception(
                                'Keyword also found earlier and can only appear once')

                        (linenum, line) = next(f)
                        self.obj = self.__prepare_line(line)
                        keyquery.discard("OBJSENSE")
                        keyquery.discard("OBJSENSE:HEAD")
                        continue

                    if line == "PSDVAR":
                        if self.psdvarnum > 0:
                            raise Exception(
                                'Keyword also found earlier and can only appear once')

                        (linenum, line) = next(f)
                        self.psdvarnum = int(self.__prepare_line(line))

                        if "PSDVAR" in keyquery:
                            self.psdvardim = [0]*self.psdvarnum
                            for i in range(self.psdvarnum):
                                (linenum, line) = next(f)
                                self.psdvardim[i] = int(
                                    self.__prepare_line(line))

                        elif not keyquery <= set(["PSDVAR:HEAD"]):
                            for i in range(self.psdvarnum):
                                next(f)

                        keyquery.discard("PSDVAR")
                        keyquery.discard("PSDVAR:HEAD")
                        continue

                    if line == "VAR":
                        if self.varnum > 0:
                            raise Exception(
                                'Keyword also found earlier and can only appear once')

                        (linenum, line) = next(f)
                        buf = self.__prepare_line(line).split(' ')
                        self.varnum = int(buf[0])
                        self.varstacknum = int(buf[1])

                        if "VAR" in keyquery:
                            self.varstackdomain = ['']*self.varstacknum
                            self.varstackdim = [0]*self.varstacknum
                            for i in range(self.varstacknum):
                                (linenum, line) = next(f)
                                buf = self.__prepare_line(line).split(' ')
                                self.varstackdomain[i] = buf[0]
                                self.varstackdim[i] = int(buf[1])

                        elif not keyquery <= set(["VAR:HEAD"]):
                            for i in range(self.varstacknum):
                                next(f)

                        keyquery.discard("VAR")
                        keyquery.discard("VAR:HEAD")
                        continue

                    if line == "INT":
                        if self.intvarnum > 0:
                            raise Exception(
                                'Keyword also found earlier and can only appear once')

                        (linenum, line) = next(f)
                        self.intvarnum = int(self.__prepare_line(line))

                        if "INT" in keyquery:
                            self.intvar = [0]*self.intvarnum
                            for i in range(self.intvarnum):
                                (linenum, line) = next(f)
                                self.intvar[i] = int(self.__prepare_line(line))

                        elif not keyquery <= set(["INT:HEAD"]):
                            for i in range(self.intvarnum):
                                next(f)

                        keyquery.discard("INT")
                        keyquery.discard("INT:HEAD")
                        continue

                    if line == "PSDCON":
                        if self.psdmapnum > 0:
                            raise Exception(
                                'Keyword also found earlier and can only appear once')

                        (linenum, line) = next(f)
                        self.psdmapnum = int(self.__prepare_line(line))

                        if "PSDCON" in keyquery:
                            self.psdmapdim = [0]*self.psdmapnum
                            for i in range(self.psdmapnum):
                                (linenum, line) = next(f)
                                self.psdmapdim[i] = int(
                                    self.__prepare_line(line))

                        elif not keyquery <= set(["PSDCON:HEAD"]):
                            for i in range(self.psdmapnum):
                                next(f)

                        keyquery.discard("PSDCON")
                        keyquery.discard("PSDCON:HEAD")
                        continue

                    if line == "CON":
                        if self.mapnum > 0:
                            raise Exception(
                                'Keyword also found earlier and can only appear once')

                        (linenum, line) = next(f)
                        buf = self.__prepare_line(line).split(' ')
                        self.mapnum = int(buf[0])
                        self.mapstacknum = int(buf[1])

                        if "CON" in keyquery:
                            self.mapstackdomain = ['']*self.mapstacknum
                            self.mapstackdim = [0]*self.mapstacknum
                            for i in range(self.mapstacknum):
                                (linenum, line) = next(f)
                                buf = self.__prepare_line(line).split(' ')
                                self.mapstackdomain[i] = buf[0]
                                self.mapstackdim[i] = int(buf[1])

                        elif not keyquery <= set(["CON:HEAD"]):
                            for i in range(self.mapstacknum):
                                next(f)

                        keyquery.discard("CON")
                        keyquery.discard("CON:HEAD")
                        continue

                    # Unrecognized line. Going to next set of keywords.
                    if line in self.keywords:
                        keyset = self.__inc_keyset(keyset)
                        keyquery = self.__resolve_keyquery_logic(keyquery)
                        keyquery -= self.keywordqueryset[keyset-1]

                        self.simplebounds = (all(x in self.fullkeyquery for x in ['VAR', 'CON'])
                                             and (not self.mapnum or 'BCOORD' in keyquery)
                                             and (not self.mapnum or not self.varnum or 'ACOORD' in keyquery)
                                             and (not self.mapnum or not self.psdvarnum or 'FCOORD' in keyquery))

                        if self.simplebounds:
                            simplemapvaridx = [-1]*self.mapnum
                            simplemapsign = [1.0]*self.mapnum
                            simplemapconst = [0.0]*self.mapnum

                        if len(keyquery) == 0:
                            break
                    else:
                        raise Exception('Keyword not recognized')

                #
                # Keyword set: Data keywords
                #
                if keyset == 2:

                    if line == "OBJFCOORD":
                        if not self.change and self.objfnnz != 0.0:
                            raise Exception(
                                'Keyword also found earlier and can only appear once')

                        (linenum, line) = next(f)
                        curnnz = int(self.__prepare_line(line))

                        if "OBJFCOORD" in keyquery:
                            self.objfsubj += [0]*curnnz
                            self.objfsubk += [0]*curnnz
                            self.objfsubl += [0]*curnnz
                            self.objfval += [0.0]*curnnz
                            for i in range(self.objfnnz, self.objfnnz + curnnz):
                                (linenum, line) = next(f)
                                buf = self.__prepare_line(line).split(' ')
                                self.objfsubj[i] = int(buf[0])
                                self.objfsubk[i] = int(buf[1])
                                self.objfsubl[i] = int(buf[2])
                                self.objfval[i] = float(buf[3])

                        elif not keyquery <= set(["OBJFCOORD:HEAD"]):
                            for i in range(self.objfnnz, self.objfnnz + curnnz):
                                next(f)

                        self.objfnnz += curnnz
                        keyquery.discard("OBJFCOORD")
                        keyquery.discard("OBJFCOORD:HEAD")
                        continue

                    if line == "OBJACOORD":
                        if not self.change and self.objannz != 0.0:
                            raise Exception(
                                'Keyword also found earlier and can only appear once')

                        (linenum, line) = next(f)
                        curnnz = int(self.__prepare_line(line))

                        if "OBJACOORD" in keyquery:
                            self.objasubj += [0]*curnnz
                            self.objaval += [0.0]*curnnz
                            for i in range(self.objannz, self.objannz + curnnz):
                                (linenum, line) = next(f)
                                buf = self.__prepare_line(line).split(' ')
                                self.objasubj[i] = int(buf[0])
                                self.objaval[i] = float(buf[1])

                        elif not keyquery <= set(["OBJACOORD:HEAD"]):
                            for i in range(self.objannz, self.objannz + curnnz):
                                next(f)

                        self.objannz += curnnz
                        keyquery.discard("OBJACOORD")
                        keyquery.discard("OBJACOORD:HEAD")
                        continue

                    if line == "OBJBCOORD":
                        if not self.change and self.objbval != 0.0:
                            raise Exception(
                                'Keyword also found earlier and can only appear once')

                        (linenum, line) = next(f)
                        self.objbval = float(self.__prepare_line(line))

                        keyquery.discard("OBJBCOORD")
                        keyquery.discard("OBJBCOORD:HEAD")
                        continue

                    if line == "FCOORD":
                        if not self.change and self.fnnz > 0:
                            raise Exception(
                                'Keyword also found earlier and can only appear once')

                        (linenum, line) = next(f)
                        curnnz = int(self.__prepare_line(line))

                        if "FCOORD" in keyquery:
                            self.fsubi += [0]*curnnz
                            self.fsubj += [0]*curnnz
                            self.fsubk += [0]*curnnz
                            self.fsubl += [0]*curnnz
                            self.fval += [0.0]*curnnz
                            for i in range(self.fnnz, self.fnnz + curnnz):
                                (linenum, line) = next(f)
                                buf = self.__prepare_line(line).split(' ')
                                self.fsubi[i] = int(buf[0])
                                self.fsubj[i] = int(buf[1])
                                self.fsubk[i] = int(buf[2])
                                self.fsubl[i] = int(buf[3])
                                self.fval[i] = float(buf[4])

                                if self.simplebounds:
                                    simplemapvaridx[self.fsubi[i]] = -2

                        elif not keyquery <= set(["FCOORD:HEAD"]):
                            for i in range(self.fnnz, self.fnnz + curnnz):
                                next(f)

                        self.fnnz += curnnz
                        keyquery.discard("FCOORD")
                        keyquery.discard("FCOORD:HEAD")
                        continue

                    if line == "ACOORD":
                        if not self.change and self.annz > 0:
                            raise Exception(
                                'Keyword also found earlier and can only appear once')

                        (linenum, line) = next(f)
                        curnnz = int(self.__prepare_line(line))

                        if "ACOORD" in keyquery:
                            self.asubi += [0]*curnnz
                            self.asubj += [0]*curnnz
                            self.aval += [0.0]*curnnz
                            for i in range(self.annz, self.annz + curnnz):
                                (linenum, line) = next(f)
                                buf = self.__prepare_line(line).split(' ')
                                self.asubi[i] = int(buf[0])
                                self.asubj[i] = int(buf[1])
                                self.aval[i] = float(buf[2])

                                if self.simplebounds:
                                    if abs(self.aval[i]) == 1.0 and simplemapvaridx[self.asubi[i]] == -1:
                                        simplemapvaridx[self.asubi[i]
                                                        ] = self.asubj[i]
                                        simplemapsign[self.asubi[i]
                                                      ] = self.aval[i]
                                    else:
                                        simplemapvaridx[self.asubi[i]] = -2

                        elif not keyquery <= set(["ACOORD:HEAD"]):
                            for i in range(self.annz, self.annz + curnnz):
                                next(f)

                        self.annz += curnnz
                        keyquery.discard("ACOORD")
                        keyquery.discard("ACOORD:HEAD")
                        continue

                    if line == "BCOORD":
                        if not self.change and self.bnnz > 0:
                            raise Exception(
                                'Keyword also found earlier and can only appear once')

                        (linenum, line) = next(f)
                        curnnz = int(self.__prepare_line(line))

                        if "BCOORD" in keyquery:
                            self.bsubi += [0]*curnnz
                            self.bval += [0.0]*curnnz
                            for i in range(self.bnnz, self.bnnz + curnnz):
                                (linenum, line) = next(f)
                                buf = self.__prepare_line(line).split(' ')
                                self.bsubi[i] = int(buf[0])
                                self.bval[i] = float(buf[1])

                                if self.simplebounds:
                                    simplemapconst[self.bsubi[i]
                                                   ] = self.bval[i]

                        elif not keyquery <= set(["BCOORD:HEAD"]):
                            for i in range(self.bnnz, self.bnnz + curnnz):
                                next(f)

                        self.bnnz += curnnz
                        keyquery.discard("BCOORD")
                        keyquery.discard("BCOORD:HEAD")
                        continue

                    if line == "HCOORD":
                        if not self.change and self.hnnz > 0:
                            raise Exception(
                                'Keyword also found earlier and can only appear once')

                        (linenum, line) = next(f)
                        curnnz = int(self.__prepare_line(line))

                        if "HCOORD" in keyquery:
                            self.hsubi += [0]*curnnz
                            self.hsubj += [0]*curnnz
                            self.hsubk += [0]*curnnz
                            self.hsubl += [0]*curnnz
                            self.hval += [0.0]*curnnz
                            for i in range(self.hnnz, self.hnnz + curnnz):
                                (linenum, line) = next(f)
                                buf = self.__prepare_line(line).split(' ')
                                self.hsubi[i] = int(buf[0])
                                self.hsubj[i] = int(buf[1])
                                self.hsubk[i] = int(buf[2])
                                self.hsubl[i] = int(buf[3])
                                self.hval[i] = float(buf[4])

                        elif not keyquery <= set(["HCOORD:HEAD"]):
                            for i in range(self.hnnz, self.hnnz + curnnz):
                                next(f)

                        self.hnnz += curnnz
                        keyquery.discard("HCOORD")
                        keyquery.discard("HCOORD:HEAD")
                        continue

                    if line == "DCOORD":
                        if not self.change and self.dnnz > 0:
                            raise Exception(
                                'Keyword also found earlier and can only appear once')

                        (linenum, line) = next(f)
                        curnnz = int(self.__prepare_line(line))

                        if "DCOORD" in keyquery:
                            self.dsubi += [0]*curnnz
                            self.dsubk += [0]*curnnz
                            self.dsubl += [0]*curnnz
                            self.dval += [0.0]*curnnz
                            for i in range(self.dnnz, self.dnnz + curnnz):
                                (linenum, line) = next(f)
                                buf = self.__prepare_line(line).split(' ')
                                self.dsubi[i] = int(buf[0])
                                self.dsubk[i] = int(buf[1])
                                self.dsubl[i] = int(buf[2])
                                self.dval[i] = float(buf[3])

                        elif not keyquery <= set(["DCOORD:HEAD"]):
                            for i in range(self.dnnz, self.dnnz + curnnz):
                                next(f)

                        self.dnnz += curnnz
                        keyquery.discard("DCOORD")
                        keyquery.discard("DCOORD:HEAD")
                        continue

                    if line == "CHANGE":
                        self.change = True
                        self.__missing_keyword_scan(keyset)

                        # Stop at current state of variables
                        yield self
                        keyset = 2
                        keyquery = self.fullkeyquery & (
                            self.keywordqueryset[2] | set([None]))
                        continue

                raise Exception('Keyword not recognized')

            #
            # End of file reached at this point
            #
            (linenum, line) = (linenum+1, "")
            if len(keyquery) != 0:
                self.__missing_keyword_scan(keyset)

            # Compute variable bounds when information is available
            if self.simplebounds:
                self.blx = [float("-inf")] * self.varnum
                self.bux = [float("+inf")] * self.varnum

                j = -1
                for k in range(self.varstacknum):
                    for km in range(self.varstackdim[k]):
                        j = j + 1
                        if self.varstackdomain[k] in ['L=', 'L+'] or (self.varstackdomain[k] == 'Q' and km <= 1) or (self.varstackdomain[k] == 'QR' and km <= 2):
                            self.blx[j] = max(self.blx[j], 0.0)
                        if self.varstackdomain[k] in ['L=', 'L-']:
                            self.bux[j] = min(self.bux[j], 0.0)

                i = -1
                for k in range(self.mapstacknum):
                    for km in range(self.mapstackdim[k]):
                        i = i + 1
                        j = simplemapvaridx[i]
                        if j >= 0:
                            if self.mapstackdomain[k] in ['L=', 'L+'] or (self.mapstackdomain[k] == 'Q' and km <= 1) or (self.mapstackdomain[k] == 'QR' and km <= 2):
                                if simplemapsign[i] > 0:
                                    self.blx[j] = max(
                                        self.blx[j], -simplemapconst[i]*simplemapsign[i])
                                else:
                                    self.bux[j] = min(
                                        self.bux[j], -simplemapconst[i]*simplemapsign[i])
                            if self.mapstackdomain[k] in ['L=', 'L-']:
                                if simplemapsign[i] > 0:
                                    self.bux[j] = min(
                                        self.bux[j], -simplemapconst[i]*simplemapsign[i])
                                else:
                                    self.blx[j] = max(
                                        self.blx[j], -simplemapconst[i]*simplemapsign[i])

            # Stop at current state of variables
            yield self

        except Exception as e:
            if isinstance(e, StopIteration):
                msg = 'Unexpected end of file'
            else:
                msg = str(e)

            raise Exception(''.join([
                msg, '. File: ', self.file, '\n',
                str(linenum+1), ': ', line, '\n']))

        finally:
            ff.close()

Example #48

0

Show file

File: format_data.py Project: felicitywang/TFMTL

    test_list.append({
        'label': line[0][1:-1],
        'title': line[1][1:-1],
        'description': line[2][1:-1]
    })

# indices
train_index = list(range(len(train_list)))
test_index = list(range(len(train_list), len(train_list) + len(test_list)))
index = {
    'train': train_index,
    'test': test_index
}
assert len(set(index['train']).intersection(index['test'])) == 0

with gzip.open('index.json.gz', mode='wt') as file:
    json.dump(index, file)

all_list = train_list
all_list.extend(test_list)

with gzip.open('data.json.gz', mode='wt') as file:
    json.dump(all_list, file)

# test
with gzip.open('data.json.gz', mode='rt') as file:
    data_list = json.load(file)

with gzip.open('index.json.gz', 'rt') as file:
    index_dict = json.load(file)
    assert len(set(index['train']).intersection(index['test'])) == 0

Example #49

0

Show file

File: qtltools_to_fgwas.py Project: elifesciences-publications/macrophage-tuQTLs

import gzip
import argparse
from signal import signal, SIGPIPE, SIG_DFL
signal(SIGPIPE,SIG_DFL)

#st.norm.ppf(q, loc=0, scale=1)

parser = argparse.ArgumentParser(description = "Convert QTLtools output into format suitable for fgwas.", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument("--qtltools", help = "Sorted QTLtools output file.")
parser.add_argument("--perm", help = "QTLtools output from the permutation run.")
parser.add_argument("--annot", help = "Variant annotations for fgwas (sorted by positition)")
parser.add_argument("--N", help = "QTL sample size.")
args = parser.parse_args()

#Set up input files
qtltools_file = gzip.open(args.qtltools, 'r')
fgwas_file = gzip.open(args.annot,'r')
perm_file = gzip.open(args.perm,'r')
n_samples = args.N

#Make a directory of phenotypes to be included in the fgwas output
phenotype_dict = dict()
for line in perm_file:
    line = line.decode("utf8").rstrip()
    fields = line.split()
    phenotype_id = fields[5]
    phenotype_dict[phenotype_id] = 1
perm_file.close()

#Make full header
header = "SNPID CHR POS Z F N SEGNUMBER"

Example #50

0

Show file

File: masked_ndvi.py Project: yogesh2209/MSThesisNDVI

									anom = (ma.masked_outside(aData,0,250).compressed() - 125) * .008
									try:
										countryDateAnom[namesDayYear]['level'] = enALS
										countryDateAnom[namesDayYear]['rId'] = regionID
										countryDateAnom[namesDayYear]['lonlatcentroid'] = lonLatCentroid
										countryDateAnom[namesDayYear]['anom'] = (countryDateAnom[namesDayYear]['anom'] + np.mean(anom)) / 2 if 'anom' in countryDateAnom[namesDayYear].keys() else np.mean(anom)
										countryDateAnom[namesDayYear]['anomcount'] = countryDateAnom[namesDayYear]['anomcount'] + len(anom) if 'anomcount' in countryDateAnom[namesDayYear].keys() else len(anom)
									except:
										continue
								# End anomaly loop

								nF = tmp + ndFl[0:-3]	# NDIV .tif file
								if not os.path.isfile(nF):
									freeSpace()
									try:
										with gzip.open(os.path.join(files[ndFl],ndFl),'rb') as nZ:
											with open(nF,'w+b') as nT: nT.write(nZ.read())
									except Exception as gzNf:
										print 'Could not extract', gzNf
										continue

								try:
									nStatsX = getSt(shp,nF,xy)
									nStats = nStatsX[0]
									nMaskStats = nStatsX[1]

									if nStats == ():

										print '! No Data, Skipping this grid point !'
										continue # Will skip to the next year for this day/xy
								except Exception as nStErr:

Example #51

0

Show file

File: aux.py Project: alexander-belikov/wos_agg

def gunzip_file(fname_in, fname_out):
    with gzip.open(fname_in, 'rb') as f_in:
        with open(fname_out, 'wb') as f_out:
            copyfileobj(f_in, f_out)

Example #52

0

Show file

File: counts.py Project: jparrent/sne

    return(name.replace('/', '_'))

files = repo_file_list(bones = False)

spectracount = 0
photocount = 0
eventswithspectra = 0
eventswithphoto = 0

for fcnt, eventfile in enumerate(tqdm(sorted(files, key=lambda s: s.lower()))):
    #if fcnt > 100:
    #    break
    fileeventname = os.path.splitext(os.path.basename(eventfile))[0].replace('.json','')

    if eventfile.split('.')[-1] == 'gz':
        with gzip.open(eventfile, 'rt') as f:
            filetext = f.read()
    else:
        with open(eventfile, 'r') as f:
            filetext = f.read()

    item = json.loads(filetext, object_pairs_hook=OrderedDict)
    namekey = list(item.keys())[0]
    item = item[namekey]

    if namekey != item['name']:
        tqdm.write(namekey + ' has different name from its key ' + item['name'])

    if 'spectra' in item:
        eventswithspectra += 1
        spectracount += len(item['spectra'])

Example #53

0

Show file

    def _iterate(self):
        """iterate over muliple files."""
        def _iter(infile):

            identifier = None

            for line in infile:
                if line.startswith("#"):
                    continue
                if line.startswith(">"):

                    if self.regexIdentifier:
                        try:
                            identifier = re.search(self.regexIdentifier,
                                                   line[1:-1]).groups()[0]
                        except AttributeError:
                            raise ValueError(
                                "could not parse identifier from line %s - check the input"
                                % line[1:-1])
                    else:
                        identifier = re.split("\s", line[1:-1])[0]

                else:
                    if not identifier:
                        raise ValueError(
                            "refusing to emit sequence without identifier - check the input"
                        )
                    yield identifier, line.strip()

        for filename in self.filenames:
            if self.format == "tar.gz" or self.format == "tar" or (
                    self.format == "auto" and filename.endswith("tar.gz")):
                if filename == "-":
                    tf = tarfile.open(fileobj=sys.stdin, mode="r|*")
                else:
                    tf = tarfile.open(filename, mode="r")
                for f in tf:
                    b, ext = os.path.splitext(f.name)
                    if ext.lower() in (".fasta", ".fa"):
                        E.info("extracting %s" % f.name)
                        infile = tf.extractfile(f)
                        for x in _iter(infile):
                            yield x
                    else:
                        E.info("skipping %s" % f.name)

                if tf != sys.stdin:
                    tf.close()
                continue
            elif self.format == "fasta.gz" or (self.format == "auto"
                                               and filename.endswith(".gz")):
                infile = gzip.open(filename, "r")
            elif filename == "-":
                infile = sys.stdin
            else:
                infile = open(filename, "r")

            for x in _iter(infile):
                yield x
            if filename != "-":
                infile.close()

        raise StopIteration

Example #54

0

Show file

File: amazon.py Project: zeta1999/polara

def parse_meta(path):
    with gzip.open(path, 'rt') as gz:
        for line in gz:
            yield literal_eval(line)

Example #55

0

Show file

def load_object(fname):
    return cPickle.load(gzip.open(fname, "rb"))

Example #56

0

Show file

File: ja_vpc2es.py Project: MuziSec/cloudtrail-logs-to-AWS-Elasticsearch-Service

def lambda_handler(event, context):
    # Attribute bucket and file name/path to variables
    bucket = event['Records'][0]['s3']['bucket']['name']
    key = event['Records'][0]['s3']['object']['key']
    
    if(bucket == None or key == None):
        return

    # Temporary location to save file downloaded from S3
    s3obj = tempfile.NamedTemporaryFile(mode='w+b',delete=False)
 
    # Download file to temp file
    s3.download_file(bucket, key, s3obj.name)

    with gzip.open(s3obj.name, 'rb') as f:
        if ("interface-id" not in f.readline().decode()):
            print("Not VPCFlow, exiting.")
            # return 
        eventcount = 1
        for line in f:
            event_array = line.split()
            event_dict = {}
            # Parse array to dict to prepare for JSON conversion
            event_dict['version'] = event_array[0].decode()
            event_dict['account-id'] = event_array[1].decode()
            event_dict['interface-id'] = event_array[2].decode()
            event_dict['srcaddr'] = event_array[3].decode()
            event_dict['dstaddr'] = event_array[4].decode()
            event_dict['srcport'] = event_array[5].decode()
            event_dict['dstport'] = event_array[6].decode()
            event_dict['protocol'] = event_array[7].decode()
            event_dict['packets'] = event_array[8].decode()
            event_dict['bytes'] = event_array[9].decode()
            event_dict['start'] = event_array[10].decode()
            event_dict['end'] = event_array[11].decode()
            event_dict['action'] = event_array[12].decode()
            event_dict['log-status'] = event_array[13].decode()
            
            # Prepare JSON to send to ES
            data = json.dumps(event_dict).encode('utf-8')
            print(data)
            
            event_date = dt.today().strftime('%Y-%m-%d')
            
            canonical_uri = '/' + indexname + '-' + event_date + '/_doc'
            # url endpoint for our ES cluster
            url = 'https://' + host + canonical_uri
            print( "Event {} url : {}\n".format(eventcount, url))
    
            # aws signed url stuff - for comments on this check their example page linked on top comment
            t = datetime.datetime.utcnow()
            amz_date = t.strftime('%Y%m%dT%H%M%SZ')
            date_stamp = t.strftime('%Y%m%d')
            canonical_querystring = ''
            canonical_headers = 'content-type:' + content_type + '\n' + \
                                'host:' + host + '\n' + \
                                'x-amz-date:' + amz_date + '\n'
            signed_headers = 'content-type;host;x-amz-date'
            payload_hash = hashlib.sha256(data).hexdigest()
            canonical_request = method + '\n' + \
                                canonical_uri + '\n' + \
                                canonical_querystring + '\n' + \
                                canonical_headers + '\n' + \
                                signed_headers + '\n' + \
                                payload_hash
            algorithm = 'AWS4-HMAC-SHA256'
            credential_scope = date_stamp + '/' + region + '/' + service + '/' + 'aws4_request'
            string_to_sign = algorithm + '\n' + \
                             amz_date + '\n' + \
                             credential_scope + '\n' + \
                             hashlib.sha256(canonical_request.encode('utf-8')).hexdigest()
            signing_key = get_signature_key(secret_key, date_stamp, region, service)
            signature = hmac.new(signing_key, string_to_sign.encode('utf-8'), hashlib.sha256).hexdigest()
            authorization_header = algorithm + ' ' + \
                                   'Credential=' + access_key + '/' + credential_scope + ', ' + \
                                   'SignedHeaders=' + signed_headers + ', ' + \
                                   'Signature=' + signature
            headers = {'Content-Type':content_type,
                       'X-Amz-Date':amz_date,
                       'Authorization':authorization_header, 'X-Amz-Security-Token': session_token}
            
            # sends the json to elasticsearch
            req = requests.post(url, data=data, headers=headers)
            print( "Attempt 0 status code: {}".format(req.status_code))
            print( "response:\n---\n{}\n---\n".format( req.text ))
            
            retry_counter = 1

            """
            if we fail for some reason we will retry 3 times
            you will most likely have errors if you're copying a huge ammount of logs from an old bucket
            to your new one.
    
            For normal usage you shouldnt have to worry about this.
            I got it in production with 90 aws accounts pointing to the same bucket,
            and a pair of m3.mediums on the ES cluster, with 0 errors.
    
            I dont raise an exception on errors to not miss all the other entries in the file, or risk repeating any
            inserts done before the error.
            """
            # if our status code is not successfull, and our retry counter is less than 4
            while (req.status_code != 201) and (retry_counter < 4):
                print( "Got code {}. Retrying {} of 3".format( req.status_code, retry_counter) )
    
                # send the data to ES again
                req = requests.post(url, data=data, headers=headers)
    
                print( "status code: {}".format(req.status_code))
                retry_counter += 1
            eventcount +=1
        
    s3obj.close()
    os.unlink(s3obj.name)

Example #57

0

Show file

File: arp_j.py Project: sersad/juniper-default-arp-policer

            res = ss.run('cli -c "file compress file /var/tmp/showintext"')
            gz_flag = "syntax error" not in res[1]
            logging.warning(f"{ip} not support file compress file /var/tmp/showintext")
        ss.close()
        # если не telnet то качаем архив по SCP иначе делам cat файла и сохраняем его в архив
        if not telnet_flag:
            # with SCP(dev, progress=True) as scp:
            with SCP(dev) as scp:
                if gz_flag:
                    scp.get("/var/tmp/showintext.gz", local_path=ip + "showintext.gz")
                else:
                    scp.get("/var/tmp/showintext", local_path=ip + "showintext")
                    fp = open(ip + "showintext", "rb")
                    data = fp.read()
                    bindata = bytearray(data)
                    with gzip.open(ip + "showintext.gz", "wb") as f:
                        f.write(bindata)
                    if os.path.exists(ip + "showintext"):
                        os.remove(ip + "showintext")
                    fp.close()
        else:
            logging.warning(f"{ip} not work SCP cat file /var/tmp/showintext and gzip")
            with gzip.open(ip + "showintext.gz", mode="wb") as file:
                dev.timeout = 600
                fs = FS(dev)
                result = fs.cat("/var/tmp/showintext")
                if result:
                    file.write(result.encode())

        # удаляем с шасси
        if not telnet_flag and gz_flag:

Example #58

0

Show file

File: devhelp2.py Project: 5l1v3r1/stoq-1

    def build_devhelp(self, outdir, outname):
        self.info('dumping devhelp index...')

        # Basic info
        root = etree.Element('book',
                             title=self.config.html_title,
                             name=self.config.project,
                             link="index.html",
                             xmlns="http://www.devhelp.net/book",
                             version="2",
                             language="python")
        tree = etree.ElementTree(root)

        # TOC
        chapters = etree.SubElement(root, 'chapters')

        tocdoc = self.env.get_and_resolve_doctree(self.config.master_doc,
                                                  self,
                                                  prune_toctrees=False)

        def write_toc(node, parent):
            if isinstance(node, addnodes.compact_paragraph) or \
                isinstance(node, nodes.bullet_list):
                for subnode in node:
                    write_toc(subnode, parent)
            elif isinstance(node, nodes.list_item):
                item = etree.SubElement(parent, 'sub')
                for subnode in node:
                    write_toc(subnode, item)
            elif isinstance(node, nodes.reference):
                parent.attrib['link'] = node['refuri']
                parent.attrib['name'] = node.astext()

        def istoctree(node):
            return isinstance(node, addnodes.compact_paragraph) and \
                'toctree' in node

        for node in tocdoc.traverse(istoctree):
            write_toc(node, chapters)

        # Index
        functions = etree.SubElement(root, 'functions')
        index = self.env.create_index(self)

        def write_index(title, refs, subitems):
            if len(refs) == 0:
                pass
            elif len(refs) == 1:
                name = title
                xml_type = "function"
                if ' ' in title:
                    func, rest = title.split(' ', 1)
                    if rest.endswith('method)') and func.endswith('()'):
                        complete_class_name = rest.rsplit(' ', 1)[0]
                        canonical_class_name = complete_class_name.rsplit(
                            '.', 1)[1]
                        name = canonical_class_name + '.' + func
                    elif rest.endswith('attribute)'):
                        complete_class_name = rest.rsplit(' ', 1)[0]
                        canonical_class_name = complete_class_name.rsplit(
                            '.', 1)[1]
                        name = canonical_class_name + '.' + func
                        xml_type = "constant"
                etree.SubElement(functions,
                                 'keyword',
                                 type=xml_type,
                                 name=name,
                                 link=refs[0][1])
            else:
                for i, ref in enumerate(refs):
                    etree.SubElement(functions,
                                     'keyword',
                                     type="function",
                                     name="[%d] %s" % (i, title),
                                     link=ref[1])

            if subitems:
                parent_title = re.sub(r'\s*\(.*\)\s*$', '', title)
                for subitem in subitems:
                    write_index("%s %s" % (parent_title, subitem[0]),
                                subitem[1], [])

        for (key, group) in index:
            for title, (refs, subitems) in group:
                write_index(title, refs, subitems)

        # Dump the XML file
        f = gzip.open(os.path.join(outdir, outname + '.devhelp2.gz'), 'w')
        try:
            tree.write(f)
        finally:
            f.close()

Example #59

0

Show file

def save_object(fname, obj):
    cPickle.dump(obj, gzip.open(fname, "wb"))

Example #60

-1

Show file

File: load_vcfs.py Project: bwang2014/clinical-filter

 def open_vcf_file(self, path):
     """ Gets a file object for an individual's VCF file.
     
     Args:
         path: path to VCF file (gzipped or text format).
         
     Returns:
         A file handle for the VCF file.
     """
     
     if not os.path.exists(path):
         raise OSError("VCF file not found at: " + path)
     
     extension = os.path.splitext(path)[1]
     
     if extension == ".gz":
         # python2 gzip opens in text, but same mode in python3 opens as
         # bytes, avoid with platform specific code
         if IS_PYTHON2:
             handle = gzip.open(path, "r")
         elif IS_PYTHON3:
             handle = gzip.open(path, "rt")
     elif extension in [".vcf", ".txt"]:
         handle = io.open(path, "r", encoding="latin_1")
     else:
         raise OSError("unsupported filetype: " + path)
     
     return handle