Example #1
0
def get_scores(filename, competition_id):
    "Returns (preview_score, score)"

    regex = r'(.+),(\d+\.\d+|\d+)'

    # parse files
    predictions = np.fromregex(filename, regex, [('id', 'S128'),
                                                 ('v0', np.float32)])
    groundtruth_filename = os.path.join(
        app.config['GROUNDTRUTH_FOLDER'],
        Competition.query.get(competition_id).groundtruth)
    groundtruth = np.fromregex(groundtruth_filename, regex,
                               [('id', 'S128'), ('v0', np.float32)])

    # sort data
    predictions.sort(order='id')
    groundtruth.sort(order='id')

    if predictions['id'].size == 0 or not np.array_equal(
            predictions['id'], groundtruth['id']):
        raise ParsingError(
            "Error parsing the submission file. Make sure it has the right format and contains the right ids."
        )

    # partition the data indices into two sets and evaluate separately
    splitpoint = int(np.round(len(groundtruth) * 0.15))
    score_p = roc_auc_score(groundtruth['v0'][:splitpoint],
                            predictions['v0'][:splitpoint])
    score_f = roc_auc_score(groundtruth['v0'][splitpoint:],
                            predictions['v0'][splitpoint:])

    return (score_p, score_f)
Example #2
0
 def toNumpy(self, regexp, dtypes):
     import numpy as np
     try:
         return np.fromregex(StringIO(self.data), regexp, dtypes)
     except TypeError:
         from PyFoam.ThirdParty.six import BytesIO, b
         return np.fromregex(BytesIO(b(self.data)), regexp, dtypes)
Example #3
0
def get_ssi_dict(dirpath):
    """save SSI data to a dictionary, where each HDF5 file is an entry"""

    dirlist = sorted(os.listdir(dirpath))
    #    regex = "\s+(\d+[.]\d+)\s+[- ]\d+[.]\d+\s+([- ]\d+[.]\d+)\s+(\d+[.]\d+)\s+(?:\d+[.]\d+e.\d+|\d+[.])\s+(?:\d+[.]\d+e.\d+|\d+[.])\s+(?:\d+[.]\d+e.\d+|\d+[.])\s+\d+[.]\d+e.\d+\s+\d+[.]\d+e.\d+\s+((?:\d+[.]\d+e.\d+|\d+[.]))\s+\S+"
    regex = "\s+(\d+[.]\d+)\s+[- ]\d+[.]\d+\s+([- ]\d+[.]\d+)\s+(\d+[.]\d+)\s+(?:\d+[.]\d+e.\d+|\d+[.])\s+(?:\d+[.]\d+e.\d+|\d+[.])\s+(?:\d+[.]\d+e.\d+|\d+[.])\s+\d+[.]\d+e.\d+\s+\d+[.]\d+e.\d+\s+((?:\d+[.]\d+e.\d+|\d+[.]))\s+((?:\d+[.]\d+e.\d+|\d+[.]))\s+\S+"
    ssi_dict = {}
    for each_filename in dirlist:
        h5_filename = each_filename[:27]
        data = np.fromregex(os.path.join(dirpath, each_filename),
                            regex,
                            dtype={
                                'names': ('alt', 'lat', 'lst', 'o3', 'err'),
                                'formats': (np.float, np.float, np.float,
                                            np.float, np.float)
                            })
        #get ls from header line in file
        ls_data = np.fromregex(os.path.join(dirpath, each_filename),
                               "\s+(\d+.\d+)\s+-\s+L_S", [('ls', np.float)])
        ls = ls_data["ls"][0]

        #not always data!!
        if len(data["alt"]) > 0:
            middle_index = int(len(data["alt"]) / 2)

            ssi_dict[h5_filename] = {
                "ls": ls,
                "lat": data["lat"][middle_index],
                "lst": data["lst"][middle_index],
                "alt": data["alt"],
                "o3": data["o3"],
                "err": data["err"]
            }

    return ssi_dict
Example #4
0
def get_scores(filename, competition_id):
    "Returns (preview_score, score)"

    regex = r'(\d+),(.+)'

    # parse files
    # filename = "C:\\Users\\jerem\\Documents\\ESTIAM\\UE Datascience\\test_only_labels.csv"
    predictions = np.fromregex(filename, regex, [('id', np.int64),
                                                 ('v0', 'S128')])
    groundtruth_filename = os.path.join(
        app.config['GROUNDTRUTH_FOLDER'],
        Competition.query.get(competition_id).groundtruth)
    groundtruth = np.fromregex(groundtruth_filename, regex, [('id', np.int64),
                                                             ('v0', 'S128')])

    # sort data
    predictions.sort(order='id')
    groundtruth.sort(order='id')

    if predictions['id'].size == 0 or not np.array_equal(
            predictions['id'], groundtruth['id']):
        raise ParsingError("Error parsing the submission file. Make sure it" +
                           "has the right format and contains the right ids.")

    # partition the data indices into two sets and evaluate separately
    splitpoint = int(np.round(len(groundtruth) * 0.15))
    score_p = accuracy_score(groundtruth['v0'][:splitpoint],
                             predictions['v0'][:splitpoint])
    score_f = accuracy_score(groundtruth['v0'][splitpoint:],
                             predictions['v0'][splitpoint:])

    return (score_p, score_f)
Example #5
0
 def parse(self,xmlHeaderFile,quick=True):
     """
     Parse the usefull part of the xml header,
     stripping time stamps and non ascii characters
     """
     
     #to strip the non ascii characters
     t = "".join(map(chr, list(range(256))))
     d = "".join(map(chr, list(range(128,256))))
     if sys.version_info > (3, 0):
         trans = str.maketrans('','',d)
         lightXML = io.StringIO()
     else:
         import StringIO
         trans = {ord(c): None for c in d}
         lightXML = StringIO.StringIO()
     
     if not quick:
         #store all time stamps in a big array
         timestamps = np.fromregex(
             xmlHeaderFile,
             r'<TimeStamp HighInteger="(\d+)" LowInteger="(\d+)"/>',
             float
             )
         xmlHeaderFile.seek(0)
         relTimestamps = np.fromregex(
             xmlHeaderFile,
             r'<RelTimeStamp Time="(\f+)" Frame="(\d+)"/>|<RelTimeStamp Frame="[0-9]*" Time="[0-9.]*"/>',
             float
             )
         xmlHeaderFile.seek(0)
         if sys.version_info > (3, 0):
             for line in xmlHeaderFile:
                 lightXML.write(line.translate(trans))
         else:
             for line in xmlHeaderFile:
                 try:
                     lightXML.write(line.translate(t,d))
                 except TypeError:
                     lightXML.write(line.translate(trans))
         
     else:
         #to strip the time stamps
         m = re.compile(
             r'''<TimeStamp HighInteger="[0-9]*" LowInteger="[0-9]*"/>|'''
             +r'''<RelTimeStamp Time="[0-9.]*" Frame="[0-9]*"/>|'''
             +r'''<RelTimeStamp Frame="[0-9]*" Time="[0-9.]*"/>'''
             )
         if sys.version_info > (3, 0):
             for line in xmlHeaderFile:
                 lightXML.write(''.join(m.split(line)).translate(trans))
         else:
             for line in xmlHeaderFile:
                 try:
                     lightXML.write(''.join(m.split(line)).translate(t,d))
                 except TypeError:
                     lightXML.write(''.join(m.split(line)).translate(trans))
     lightXML.seek(0)
     self.xmlHeader = parse(lightXML)
Example #6
0
 def load_wrl(self, path):
     regexp = r"\s([+-]?[0-9]*[.]?[0-9]+[e]?[+-]?[0-9]*)\s([+-]?[0-9]*[.]?[0-9]+[e]?[+-]?[0-9]*)\s([+-]?[0-9]*[.]?[0-9]+[e]?[+-]?[0-9]*),"
     regexp2 = r"\s([+-]?[0]*[.]?[0-9]+[e]?[+-]?[0-9]*)\s([+-]?[0]*[.]?[0-9]+[e]?[+-]?[0-9]*),"
     regexp3 = r"\s([+-]?[0-9]*),\s([+-]?[0-9]*),\s([+-]?[0-9]*), -1,"
     self.vertices = np.fromregex(path + ".wrl", regexp, ('f'))
     self.uv3d = np.fromregex(path + ".wrl", regexp2, ('f'))
     self.faces = np.fromregex(path + ".wrl", regexp3, ('i'))
     self.image = cv.imread(path + '.bmp')
     [self.h, self.w, _] = self.image.shape
Example #7
0
def readMaskRegion(regfile):
    box = numpy.fromregex(regfile,r"box\(([0-9]*\.?[0-9]+),(-[0-9]*\.?[0-9]+),([0-9]*\.?[0-9]+)\",([0-9]*\.?[0-9]+)\",([0-9]*\.?[0-9]+)",
                          [('xc',numpy.float),('yc',numpy.float),('width',numpy.float),('height',numpy.float),('angle',numpy.float)])
    try:
        box[0]
    except IndexError:
        print 'Assuming a positive declination region.'
        box = numpy.fromregex(regfile,r"box\(([0-9]*\.?[0-9]+),([0-9]*\.?[0-9]+),([0-9]*\.?[0-9]+)\",([0-9]*\.?[0-9]+)\",([0-9]*\.?[0-9]+)",[('xc',numpy.float),('yc',numpy.float),('width',numpy.float),('height',numpy.float),('angle',numpy.float)])
        
    return box[0]
Example #8
0
 def toNumpy(self, regexp, dtypes):
     """Assume that the unparsed data contains line-wise data and transform it to a numpy-array.
     @param regexp: regular expression where the groups correspond to the dtypes,
     @param dtypes: list with dtypes"""
     import numpy as np
     try:
         return np.fromregex(StringIO(self.data), regexp, dtypes)
     except TypeError:
         from PyFoam.ThirdParty.six import BytesIO, b
         return np.fromregex(BytesIO(b(self.data)), regexp, dtypes)
Example #9
0
def load(filename): # -> (p_vs, faces)
  # Check face index format
  with_normal, f_pattern = check_index_format(filename)

  # Actually parse file via numpy.fromregex
  obj_v  = np.fromregex(filename, *v_pattern)
  obj_f  = np.fromregex(filename, *f_pattern)

  p_vs   = np.stack([obj_v['x'],  obj_v['y'],  obj_v['z']] ).T
  faces  = np.stack([obj_f['v0'], obj_f['v1'], obj_f['v2']]).T - 1
  return p_vs, faces
Example #10
0
def loadobj(filename, load_normals=False):
    """ load a wavefront obj file
        loads vertices into a (x,y,z) struct array and vertex indices
        into a n x 3 index array 
        only loads obj files vertex positions and also
        only works with triangle meshes """
    vertices = np.fromregex(open(filename), _vertex_regex, np.float)
    if load_normals:
        normals = np.fromregex(open(filename), _normal_regex, np.float)
    triangles = np.fromregex(open(filename), _triangle_regex, np.int) - 1 # 1-based indexing in obj file format!
    if load_normals:
        return vertices, normals, triangles
    else:
        return vertices, triangles
Example #11
0
def day5():
    """Counting line intersections."""
    crds = np.fromregex(
        "5.txt",
        r"(\d+),(\d+) -> (\d+),(\d+)",
        [("x0", np.int16), ("y0", np.int16), ("x1", np.int16), ("y1", np.int16)],
    )
    grid = np.zeros(
        (max(map(max, crds[["x0", "x1"]])) + 1, max(map(max, crds[["y0", "y1"]])) + 1),
        dtype=np.int16,
    )
    for x0, y0, x1, y1 in crds:
        if x0 == x1 or y0 == y1:  # horiz/vert
            grid[min(y0, y1) : max(y0, y1) + 1, min(x0, x1) : max(x0, x1) + 1] += 1
    res1 = (grid > 1).sum()

    for x0, y0, x1, y1 in crds:
        if abs(x1 - x0) == abs(y1 - y0):  # diag
            yd = 1 if y1 >= y0 else -1
            xd = 1 if x1 >= x0 else -1
            for y, x in zip(range(y0, y1 + yd, yd), range(x0, x1 + xd, xd)):
                grid[y, x] += 1
    res2 = (grid > 1).sum()

    return res1, res2
Example #12
0
def read_volumes_global_areas(seg_stats_file):
    """Returns the volumes of big global areas such as the ICV, Left/Right hemisphere cortical gray/white matter volume, Subcortical gray matter volume and Supratentorial volume etc.


    Order of the return values is as it appears in the original aseg.stats file (not as mentioned above).
    """

    # Snippet from the relevant part of the aseg.stats
    # Measure lhCortex, lhCortexVol, Left hemisphere cortical gray matter volume, 234615.987869, mm^3
    # Measure rhCortex, rhCortexVol, Right hemisphere cortical gray matter volume, 260948.684264, mm^3
    # Measure Cortex, CortexVol, Total cortical gray matter volume, 495564.672133, mm^3
    # Measure lhCorticalWhiteMatter, lhCorticalWhiteMatterVol, Left hemisphere cortical white matter volume, 222201.531250, mm^3
    # Measure rhCorticalWhiteMatter, rhCorticalWhiteMatterVol, Right hemisphere cortical white matter volume, 232088.671875, mm^3
    # Measure CorticalWhiteMatter, CorticalWhiteMatterVol, Total cortical white matter volume, 454290.203125, mm^3
    # Measure SubCortGray, SubCortGrayVol, Subcortical gray matter volume, 188561.000000, mm^3
    # Measure TotalGray, TotalGrayVol, Total gray matter volume, 684125.672133, mm^3
    # Measure SupraTentorial, SupraTentorialVol, Supratentorial volume, 1046623.140109, mm^3
    # Measure IntraCranialVol, ICV, Intracranial Volume, 1137205.249190, mm^3

    wb_regex_pattern = r'# Measure ([\w/+_\- ]+), ([\w/+_\- ]+), ([\w/+_\- ]+), ([\d\.]+), ([\w/+_\-^]+)'
    datatypes = np.dtype('U100,U100,U100,f8,U10')
    stats = np.fromregex(seg_stats_file, wb_regex_pattern, dtype=datatypes)
    wb_data = np.array([seg[3] for seg in stats])

    return wb_data.flatten()
Example #13
0
def load_sv(fname, format=None):
  if format is None: return np.genfromtxt(fname)

  floatingReString=r'([-+]?(?:\d+(?:\.\d*)?|\.\d+)(?:[eE][-+]?\d+)?)'
  complexReString =r'\(\s*'+floatingReString+'\s*,\s*'+floatingReString+'\s*\)'

  return np.fromregex(fname,format.replace(r'+',r'\s*').replace('f',floatingReString).replace('c',complexReString),np.float)
Example #14
0
def parse_x_y_dut1_from_finals_all(f):
    return np.fromregex(f, _R, [
        ('utc_mjd', float),
        ('x_arcseconds', float),
        ('y_arcseconds', float),
        ('dut1', float),
    ])
Example #15
0
def readheader(catalog):
    '''
    This function extracts the #ttype indexed header of a catalog.
    Input:
    catalog = [string], Name (perhaps including path) of the catalog
        that contains all of the data (e.g. x,y,e1,e2,...). Must include
        ttype header designations for the columns e.g.:
        #ttype0 = objid
        #ttype1 = x
    Output:
    dic = dictonary that contains the {ttype string,column #}.
    '''
    import numpy
    import sys
    header = numpy.fromregex(catalog,r"ttype([0-9]+)(?:\s)?=(?:\s)?(\w+)",
                                 [('column',numpy.int64),('name','S20')])
    # Determine if the catalog is 0 or 1 indexed and if 1 indexed then change to 0
    if header['column'][0] == 1:
        header['column']-=1
    elif header['column'][0] != 0:
        print 'readheader: the catalog is not ttype indexed, please index using format ttype(column#)=(column name), exiting'
        sys.exit()
    for i in range(len(header)):
        if i == 0:
            dic = {header[i][1]:header[i][0]}
        else:
            dic[header[i][1]]=header[i][0]
    return dic
Example #16
0
def get_goddard_dict(filepath):
    """save Goddard data to a dictionary, where each HDF5 file is an entry"""

    regex = r"\s+".join([NUM_MATCH1] + [NUM_MATCH] * 2 + [TEXT_MATCH])

    # regex = "\s+(\d+[.]\d+)\s+([- ]\d+[.]\d+)\s+(\d+[.]\d+)\s+(\d+[.]\d+)\s+((?:\d+[.]\d+e.\d+|\d+[.]))\s+((?:\d+[.]\d+e[+]\d+|\d+[.]))\s+(\S+)\s"
    data = np.fromregex(filepath,
                        regex,
                        dtype={
                            'names': ('alt', 'o3', 'o3_error', 'filename'),
                            'formats': (np.float, np.float, np.float, 'U30')
                        })

    filenames = data["filename"].tolist()
    filenames_unique = list(set(filenames))

    goddard_dict = {}
    for each_filename in filenames_unique:
        indices = [
            index for index, value in enumerate(filenames)
            if value == each_filename
        ]
        # middle_index = int(len(indices)/2)
        goddard_dict[each_filename] = {
            # "ls":data["ls"][indices[0]],
            # "lat":data["lat"][indices[middle_index]],
            # "lst":data["lst"][indices[middle_index]],
            "alt": data["alt"][indices],
            "o3": data["o3"][indices],
            "o3_error": data["o3_error"][indices],
        }
    return goddard_dict
Example #17
0
def parse_x_y_dut1_from_finals_all(f):
    data = np.fromregex(f, _R, [
        ('mjd_utc', np.float32),
        ('x', np.float32),
        ('y', np.float32),
        ('dut1', np.float32),
    ])
    return data['mjd_utc'], data['x'], data['y'], data['dut1']
Example #18
0
def read_orca_trj(fname):
    """return numpy 2D array
    """
    # http://stackoverflow.com/questions/14645789/
    # numpy-reading-file-with-filtering-lines-on-the-fly
    import numpy as np
    regexp = r'\s+\w+' + r'\s+([-.0-9]+)' * 3 + r'\s*\n'
    return np.fromregex(fname, regexp, dtype='f')
Example #19
0
def readregions(regfile):
    '''
    regfile = (string) the ds9 region file, assumes that it was written using
              'ds9' Format and 'image' Coordinate System

    Currently this function only works on circles, ellipse, and box regions
    '''
    # find all the circle regions
    circ = numpy.fromregex(regfile,r"circle\(([0-9]*\.?[0-9]+),([0-9]*\.?[0-9]+),([0-9]*\.?[0-9]+)",[('xc',numpy.float),('yc',numpy.float),('rc',numpy.float)])

    # find all the elliptical regions
    ellip = numpy.fromregex(regfile,r"ellipse\(([0-9]*\.?[0-9]+),([0-9]*\.?[0-9]+),([0-9]*\.?[0-9]+),([0-9]*\.?[0-9]+),([0-9]*\.?[0-9]+)",[('xc',numpy.float),('yc',numpy.float),('a',numpy.float),('b',numpy.float),('angle',numpy.float)])

    # find all the box regions
    box = numpy.fromregex(regfile,r"box\(([0-9]*\.?[0-9]+),([0-9]*\.?[0-9]+),([0-9]*\.?[0-9]+),([0-9]*\.?[0-9]+),([0-9]*\.?[0-9]+)",[('xc',numpy.float),('yc',numpy.float),('width',numpy.float),('height',numpy.float),('angle',numpy.float)])

    return circ, ellip, box
Example #20
0
def get_stocks():
    global gldict
    for name in names:
        filename = "%s.csv" % name
        if not os.path.exists(filename):
            yh_download(name)
        arr = fromregex(filename, yf_regex, yf_dtype)
        gldict[name] = arr
Example #21
0
	def get_ordered_dict(self, fpath):
		for regex in REGEXES:
			data = np.fromregex(fpath, *regex)
			if len(data): break
		if data == None: raise Exception('no matches for any regex (%s)' % (fpath,))
		names = set(data['name']) # TEST, TRAIN, TRAIN_BCH, TRAIN_EP, ...
		# pdb.set_trace()
		table = {name: data[data['name'] == name] for name in names}
		return OrderedDict(sorted(table.items(), key=lambda tup: len(tup[1]), reverse=True))
Example #22
0
    def test_record(self):
        c = StringIO.StringIO()
        c.write('1.312 foo\n1.534 bar\n4.444 qux')
        c.seek(0)

        dt = [('num', np.float64), ('val', 'S3')]
        x = np.fromregex(c, r"([0-9.]+)\s+(...)", dt)
        a = np.array([(1.312, 'foo'), (1.534, 'bar'), (4.444, 'qux')], dtype=dt)
        assert_array_equal(x, a)
Example #23
0
    def test_record_3(self):
        c = StringIO.StringIO()
        c.write('1312 foo\n1534 bar\n4444 qux')
        c.seek(0)

        dt = [('num', np.float64)]
        x = np.fromregex(c, r"(\d+)\s+...", dt)
        a = np.array([(1312,), (1534,), (4444,)], dtype=dt)
        assert_array_equal(x, a)
Example #24
0
def read_tag_data_from_file(tag):

    file = "D:/Descargas/Universidad/TFG/analisis/sensores/" + tag + ".txt"

    return np.fromregex(file,
                        r"POS,(.+),(.+),(.+),(.+)", [('x', np.float64),
                                                     ('y', np.float64),
                                                     ('z', np.float64),
                                                     ('qf', np.int32)])[1:]
Example #25
0
    def test_record(self):
        c = StringIO()
        c.write(asbytes("1.312 foo\n1.534 bar\n4.444 qux"))
        c.seek(0)

        dt = [("num", np.float64), ("val", "S3")]
        x = np.fromregex(c, r"([0-9.]+)\s+(...)", dt)
        a = np.array([(1.312, "foo"), (1.534, "bar"), (4.444, "qux")], dtype=dt)
        assert_array_equal(x, a)
Example #26
0
    def test_record_2(self):
        c = StringIO()
        c.write(asbytes("1312 foo\n1534 bar\n4444 qux"))
        c.seek(0)

        dt = [("num", np.int32), ("val", "S3")]
        x = np.fromregex(c, r"(\d+)\s+(...)", dt)
        a = np.array([(1312, "foo"), (1534, "bar"), (4444, "qux")], dtype=dt)
        assert_array_equal(x, a)
Example #27
0
    def test_record_2(self):
        c = StringIO.StringIO()
        c.write('1312 foo\n1534 bar\n4444 qux')
        c.seek(0)

        dt = [('num', np.int32), ('val', 'S3')]
        x = np.fromregex(c, r"(\d+)\s+(...)", dt)
        a = np.array([(1312, 'foo'), (1534, 'bar'), (4444, 'qux')], dtype=dt)
        assert_array_equal(x, a)
Example #28
0
    def test_record_3(self):
        c = StringIO.StringIO()
        c.write('1312 foo\n1534 bar\n4444 qux')
        c.seek(0)

        dt = [('num', np.float64)]
        x = np.fromregex(c, r"(\d+)\s+...", dt)
        a = np.array([(1312, ), (1534, ), (4444, )], dtype=dt)
        assert_array_equal(x, a)
Example #29
0
def parseSequences(Regexp, Filename, RemoveLineEnding):
    Data = np.fromregex(Filename, Regexp, [('id', object),
                                           ('sequence', object)])
    if RemoveLineEnding == True:
        for i in range(len(Data)):
            #Data[i][1].replace('\n', '').replace('\r', '')
            Data[i][1] = (re.sub(r"[\r\n\s]+", r"", Data[i][1]))
            Data[i][1] = Data[i][1].upper()
    return Data
Example #30
0
    def test_record_2(self):
        return  # pass this test until #736 is resolved
        c = StringIO.StringIO()
        c.write("1312 foo\n1534 bar\n4444 qux")
        c.seek(0)

        dt = [("num", np.int32), ("val", "S3")]
        x = np.fromregex(c, r"(\d+)\s+(...)", dt)
        a = np.array([(1312, "foo"), (1534, "bar"), (4444, "qux")], dtype=dt)
        assert_array_equal(x, a)
Example #31
0
 def do_once(self):
     self.file.seek(0)
     output = numpy.fromregex(
         self.file, 
         self.line_re, 
         [('ip', 'S20'), ('day', 'S25'), ('month', 'S20'), ('year', 'S4'), ('time', 'S20'), ('method', 'S7'), ('path', 'S100'), ('size', numpy.int32)]
         )
     total_time_by_month = defaultdict(int)
     for row in output:
         total_time_by_month[(row[2], row[1])] += row[7]
Example #32
0
    def test_record_2(self):
        c = StringIO.StringIO()
        c.write('1312 foo\n1534 bar\n4444 qux')
        c.seek(0)

        dt = [('num', np.int32), ('val', 'S3')]
        x = np.fromregex(c, r"(\d+)\s+(...)", dt)
        a = np.array([(1312, 'foo'), (1534, 'bar'), (4444, 'qux')],
                     dtype=dt)
        assert_array_equal(x, a)
Example #33
0
def load_dataset(csv_file, image_root, fail_on_missing=True):
    """ Loads a dataset .csv file, returning PIDs and FIDs.

    PIDs are the "person IDs", i.e. class names/labels.
    FIDs are the "file IDs", which are individual relative filenames.

    Args:
        csv_file (string, file-like object): The csv data file to load.
        image_root (string): The path to which the image files as stored in the
            csv file are relative to. Used for verification purposes.
            If this is `None`, no verification at all is made.
        fail_on_missing (bool or None): If one or more files from the dataset
            are not present in the `image_root`, either raise an IOError (if
            True) or remove it from the returned dataset (if False).

    Returns:
        (pids, fids) a tuple of numpy string arrays corresponding to the PIDs,
        i.e. the identities/classes/labels and the FIDs, i.e. the filenames.

    Raises:
        IOError if any one file is missing and `fail_on_missing` is True.
    """
    #np.fromregex(csv_file, r'(\d+),"(.+)"', np.object)

    if 'clothing1m' in csv_file:
        dataset = np.fromregex(csv_file, r'(\d+),"(.+)"', np.object)
    else:
        dataset = np.genfromtxt(csv_file, delimiter=',', dtype='|U')




    pids, fids = dataset.T

    # Possibly check if all files exist
    if image_root is not None:
        missing = np.full(len(fids), False, dtype=bool)
        for i, fid in enumerate(fids):
            missing[i] = not os.path.isfile(os.path.join(image_root, fid))

        missing_count = np.sum(missing)
        if missing_count > 0:
            if fail_on_missing:
                raise IOError('Using the `{}` file and `{}` as an image root {}/'
                            '{} images are missing'.format(
                                csv_file, image_root, missing_count, len(fids)))
            else:
                print('[Warning] removing {} missing file(s) from the'
                    ' dataset.'.format(missing_count))
                # We simply remove the missing files.
                fids = fids[np.logical_not(missing)]
                pids = pids[np.logical_not(missing)]

    return pids, fids
def combine_history_statsfiles(cnavgdir):
    statsfiles = glob.glob(cnavgdir + "/" + "HISTORY_STATS*")
    sys.stderr.write("statsfiles: %s\n" % (str(statsfiles)))
    mystats = np.array([])
    mysims = []
    runlens = []
    for statsfile in statsfiles:
        sim = int(re.match(".*HISTORY_STATS_(\d+)", statsfile).group(1))
        mysims.append(sim)
        historystats = np.fromregex(statsfile, r"\((\d+), (\d+)\)\t(\d+)\t(\d+)\t(\d+)\t(\d+)", dtype=int)
        runlens.append(historystats.shape[0])
    runlen = max(runlens)
    for sim in mysims:
        statsfile = os.path.join(cnavgdir, "HISTORY_STATS_%d" % sim)
        historystats = np.fromregex(statsfile, r"\((\d+), (\d+)\)\t(\d+)\t(\d+)\t(\d+)\t(\d+)", dtype=int)
        if mystats.size == 0:
            mystats = np.zeros(((max(mysims) + 1) * runlen, historystats.shape[1] + 1), dtype=int)
        hids = np.array(range(historystats.shape[0])) + sim * Global_BINWIDTH
        i = sim * runlen
        mystats[i : i + runlen, :] = np.hstack((np.atleast_2d(hids).T, historystats))
    return mystats
Example #35
0
def parseTEM(filename):
    f=open(filename,"r")
    regexp = r"[-+]?[0-9]*\.?[0-9]+"
    res=numpy.fromregex(filename, regexp,[('num', numpy.float), ])
    res2=res['num'][3:].reshape((int(res['num'][1]),6))
    pos,rot = numpy.hsplit(res2, 2)
    M = []#[eulerToMatrix(r) for r in rot]
    for i in range(len(pos)):
        mrot = eulerToMatrix(rot[i])
        mrot[3][:3] = pos[i]
        M.append(mrot)
    return M
Example #36
0
def readFromFile_CoordsXYZ(FN, forceRead=False):
	# Sanity check filename:
	if not forceRead:
		doRegexMatch(FN,names.pixelCoordsFNRE)
	assert (fileExists(FN))

	# Read (x,y,z) coordinate:
	#dataRegex = myRE.compile(myRE.withinSpaces(myRE.floatREx3))
	dataRegex = myRE.compile(myRE.floatREx3)
	coords = np.fromregex(FN,dataRegex,dtype='f')
	# Return the array
	return coords
Example #37
0
def main(fname):
    vs = np.fromregex(
        fname,
        "V\s+(\d+)\s+(\S+)\s+(\S+)\s+(\S+)",
        [("index", np.int32), ("x", np.float64), ("y", np.float64),
         ("z", np.float64)],
    )
    vs = np.array([[i for i in arr] for arr in vs])[:, 1:]
    ts = np.fromregex(
        fname,
        "T\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)",
        [("v0", np.int32), ("v1", np.int32), ("v2", np.int32),
         ("v3", np.int32)],
    )
    ts = np.array([[i for i in arr] for arr in ts]).astype(int)

    print(vs.shape, ts.shape)

    # filter dummy vertices
    ts = ts[np.all((ts < 2) | (ts > 6), axis=1), :]
    plot(vs, ts)
Example #38
0
def _parse_input_csv(block_offset_size_queue, csv_filepath, numpy_array_queue):
    """Parse CSV file lines to (datetime64[d], userhash, lat, lng) tuples.

    Parameters:

        block_offset_size_queue (multiprocessing.Queue): contains tuples of
            the form (offset, chunk size) to direct where the file should be
            read from
        numpy_array_queue (multiprocessing.Queue): output queue will have
            paths to files that can be opened with numpy.load and contain
            structured arrays of (datetime, userid, lat, lng) parsed from the
            raw CSV file
        csv_filepath (string): path to csv file to parse from

    Returns:
        None
    """
    for file_offset, chunk_size in iter(block_offset_size_queue.get, 'STOP'):
        csv_file = open(csv_filepath, 'r')
        csv_file.seek(file_offset, 0)
        chunk_string = csv_file.read(chunk_size)
        csv_file.close()

        # sample line:
        # 8568090486,48344648@N00,2013-03-17 16:27:27,42.383841,-71.138378,16
        # this pattern matches the above style of line and only parses valid
        # dates to handle some cases where there are weird dates in the input
        pattern = r"[^,]+,([^,]+),(19|20\d\d-(?:0[1-9]|1[012])-(?:0[1-9]|[12][0-9]|3[01])) [^,]+,([^,]+),([^,]+),[^\n]"  # pylint: disable=line-too-long
        try:
            chunk_string = unicode(chunk_string)
        except NameError:
            # Python 3, it's already unicode
            pass
        result = numpy.fromregex(StringIO(chunk_string), pattern,
                                 [('user', 'S40'), ('date', 'datetime64[D]'),
                                  ('lat', 'f4'), ('lng', 'f4')])

        def md5hash(user_string):
            """md5hash userid."""
            return hashlib.md5(user_string).digest()[-4:]

        md5hash_v = numpy.vectorize(md5hash, otypes=['S4'])
        hashes = md5hash_v(result['user'])

        user_day_lng_lat = numpy.empty(hashes.size,
                                       dtype='datetime64[D],a4,f4,f4')
        user_day_lng_lat['f0'] = result['date']
        user_day_lng_lat['f1'] = hashes
        user_day_lng_lat['f2'] = result['lng']
        user_day_lng_lat['f3'] = result['lat']
        numpy_array_queue.put(user_day_lng_lat)
    numpy_array_queue.put('STOP')
Example #39
0
    def parse(self,xmlHeaderFile,quick=True):
        """
Parse the usefull part of the xml header,
stripping time stamps and non ascii characters
"""
        lightXML = StringIO.StringIO()
        #to strip the non ascii characters
        t = "".join(map(chr, range(256)))
        d = "".join(map(chr, range(128,256)))
        
        if not quick:
            #store all time stamps in a big array
            timestamps = np.fromregex(
                xmlHeaderFile,
                r'<TimeStamp HighInteger="(\d+)" LowInteger="(\d+)"/>',
                float
                )
            xmlHeaderFile.seek(0)
            relTimestamps = np.fromregex(
                xmlHeaderFile,
                r'<RelTimeStamp Time="(\f+)" Frame="(\d+)"/>|<RelTimeStamp Frame="[0-9]*" Time="[0-9.]*"/>',
                float
                )
            xmlHeaderFile.seek(0)
            for line in xmlHeaderFile:
                lightXML.write(line.translate(t,d))
            
        else:
            #to strip the time stamps
            m = re.compile(
                r'''<TimeStamp HighInteger="[0-9]*" LowInteger="[0-9]*"/>|'''
                +r'''<RelTimeStamp Time="[0-9.]*" Frame="[0-9]*"/>|'''
                +r'''<RelTimeStamp Frame="[0-9]*" Time="[0-9.]*"/>'''
                )
            
            for line in xmlHeaderFile:
                lightXML.write(''.join(m.split(line)).translate(t,d))
        lightXML.seek(0)
        self.xmlHeader = parse(lightXML)
Example #40
0
def load_nakamuraspr(fn):
    """Load a spr file as returned by Nakamura et al.'s alignment methods.

    Fields of the file format as specified in [8]_:
    ID (onset time) (offset time) (spelled pitch) (onset velocity)
    (offset velocity) channel

    These files contain extra information not included in match or corresp files,
    particularly duration and pedal information, and can be used to complement the
    information from the `load_nakamuracorresp` or `load_nakamuramatch`.

    Parameters
    ----------
    fn : str
        The nakamura match.txt-file

    Returns
    -------
    note_array : structured array
        structured array with note information

    References
    ----------
    .. [8] https://midialignment.github.io/MANUAL.pdf


    TODO
    ----
    * Import pedal information
    """
    note_array_dtype = [("onset_sec", "f4"), ("duration_sec", "f4"),
                        ("pitch", "i4"), ("velocity", "i4"), ("channel", "i4"),
                        ("id", "U256")]
    dtype = [("ID", "U256"), ("Ontime", "f"), ("Offtime", "f"),
             ("Sitch", "U256"), ("Onvel", "i"), ("Offvel", "i"),
             ("Channel", "i")]

    pattern = r"(\d+)\t(.+)\t(.+)\t(.+)\t(.+)\t(.+)\t(.+)"

    result = np.fromregex(fn, pattern, dtype=dtype)
    note_array = np.empty(len(result), dtype=note_array_dtype)

    note_array["id"] = result["ID"]
    note_array["onset_sec"] = result["Ontime"]
    note_array["duration_sec"] = result["Offtime"] - result["Ontime"]
    note_array["pitch"] = np.array(
        [note_name_to_midi_pitch(n) for n in result["Sitch"]])
    note_array["velocity"] = result["Onvel"]
    note_array["channel"] = result["Channel"]

    return note_array
Example #41
0
File: em.py Project: raj334/typhon
    def estimate_band_coefficients(self, sat=None, instr=None, ch=None):
        """Estimate band coefficients for fast/explicit BT calculations

        In some circumstances, a fully integrated SRF may be more
        expensive than needed.  We can then choose an effective wavelength lambda_c
        along with coefficients alpha, beta such that instead of integrating, we
        estimate R = B(lambda*, T*), with T* = alpha + beta · T_B and lambda* a wavelength
        which may be close to the centroid lambda_c (but there is no
        guarantee).  Such an approximation eliminates the explicit use of
        an integral which can make analysis easier.

        Returns:

            alpha (float): Offset in approximation for T*
            beta (float): Slope in approximation for T*
            lambda_eff (float): Effective wavelength
            delta_alpha (float): Uncertainty in alpha
            delta_beta (float): Uncertainty in beta
            delta_lambda_eff (float): Uncertainty in lambda_eff
        """

        warnings.warn("Obtaining band coefficients from file", UserWarning)
        srcfile = config.conf[instr]["band_file"].format(sat=sat)
        rxp = r"(.{5,6})_ch(\d\d?)_shift([+-]\d+)pm\.nc\s+([\d.]+)\s+(-?[\de\-.]+)\s+([\d.]+)"
        dtp = [("satname", "S6"), ("channel", "u1"), ("shift", "i2"),
               ("centre", "f4"), ("alpha", "f4"), ("beta", "f4")]
        M = numpy.fromregex(srcfile, rxp, dtp).reshape(19, 7)
        dims = ("channel", "shiftno")
        ds = xarray.Dataset(
            {
                "centre": (dims, M["centre"]),
                "alpha": (dims, M["alpha"]),
                "beta": (dims, M["beta"]),
                "shift": (dims, M["shift"])
            },
            coords={"channel": M["channel"][:, 0]})

        ds = ds.sel(channel=ch)

        ds0 = ds.sel(shiftno=0)  # varies 1.1 – 15.2 nm depending on channel
        lambda_c = UADA(ds0["centre"], attrs={"units": "1/cm"})
        alpha = UADA(ds0["alpha"], attrs={"units": "K"})
        beta = UADA(ds0["beta"], attrs={"units": "1"})

        delta_ds = ds.sel(shiftno=1) - ds0
        delta_lambda_c = abs(UADA(delta_ds["centre"], attrs={"units": "1/cm"}))
        delta_alpha = abs(UADA(delta_ds["alpha"], attrs={"units": "K"}))
        delta_beta = abs(UADA(delta_ds["beta"], attrs={"units": "1"}))

        return (alpha, beta, lambda_c, delta_alpha, delta_beta, delta_lambda_c)
def main():
  
  args = options()
  path=os.getcwd()
  #folders=open(args.folder, 'r')
  #for l in folders:
  #  fname=l.replace("\n", "")
  #  source=str(path)+"/"+str(args.imgfolder)+"/"+str(fname)
  #  destination=str(path)+"/"+str(args.outdir)
  #  shutil.move(source, destination)
  
  snapshots=str(path)+"/"+str(args.imgfolder)+"/SnapshotInfo.csv"
  #snapshot_data = genfromtxt(snapshots, delimiter=',')
  regex="^.*B*.*$"
  select=np.fromregex(snapshots,regex)
  print snapshot_data
def main():

    args = options()
    path = os.getcwd()
    #folders=open(args.folder, 'r')
    #for l in folders:
    #  fname=l.replace("\n", "")
    #  source=str(path)+"/"+str(args.imgfolder)+"/"+str(fname)
    #  destination=str(path)+"/"+str(args.outdir)
    #  shutil.move(source, destination)

    snapshots = str(path) + "/" + str(args.imgfolder) + "/SnapshotInfo.csv"
    #snapshot_data = genfromtxt(snapshots, delimiter=',')
    regex = "^.*B*.*$"
    select = np.fromregex(snapshots, regex)
    print snapshot_data
Example #44
0
def load_kaldi_priors(path, prior_cutoff, uniform_smoothing_scaler=0.05):
    assert 0 <= uniform_smoothing_scaler <= 1.0, (
        "Expected 0 <= uniform_smoothing_scaler <=1, got %f" % uniform_smoothing_scaler
    )
    numbers = np.fromregex(path, r"([\d\.e+]+)", dtype=[('num', np.float32)])
    class_counts = np.asarray(numbers['num'], dtype=theano.config.floatX)
    # compute the uniform smoothing count
    uniform_priors = np.ceil(class_counts.mean() * uniform_smoothing_scaler)
    priors = (class_counts + uniform_priors) / class_counts.sum()
#floor zeroes to something small so log() on that will be different 
# from -inf or better skip these in contribution at all i.e. set to -log(0)?
    priors[priors < prior_cutoff] = prior_cutoff
    assert np.all(priors > 0) and np.all(priors <= 1.0), (
        "Prior probabilities outside [0,1] range."
    )
    log_priors = np.log(priors)
    assert not np.any(np.isinf(log_priors)), (
        "Log-priors contain -inf elements."
    )
    return log_priors
Example #45
0
    def get_dt(self):
        """
        Returns the time step of the simulation.

        Returns
        -------
        A float in seconds.
        """
        data_file_path = self.get_data_path()

        # matches floats and scientific floats
        rg_flt = r"([-\+[0-9]+\.[0-9]*[Ee]*[\+-]*[0-9]*)"

        # our UNIT_TIME is scaled to dt
        dt = np.fromregex(
            data_file_path,
            r"\s+UNIT_TIME " +
            rg_flt + r"\n",
            dtype=np.dtype([('dt', 'float')])
        )

        return dt['dt'][0]
def apache_time(s):
    year = int(s[7:11])
    month = month_map[s[3:6]]
    day = int(s[0:2])
    hours = int(s[12:14])    
    minutes = int(s[15:17])
    seconds = int(s[18:20])
    timezone = Timezone(s[-5:])
    date_time = datetime.datetime(year, month, day, hours, minutes, seconds, 0, timezone)
    return date_time
         
ary = fromregex('access_log', regex_str,
                [('host', object), 
                 ('user', object),
                 ('time', object),
                 ('request', object),
                 ('status', int16), # I *think* this is guaranteed to be an int32...
                 ('size', int64), # Since this can be '-', it can't be a number.
                 ('referer', object),
                 ('agent', object),
                ], size=size_converter, time=apache_time)
                
#----------------------------------------------------------------------------
# Find all the requests that generated a 404
#----------------------------------------------------------------------------
ary404 = ary[ary['status'] == 404]
ary404['request']
requests404 = set(ary404['request'])

#----------------------------------------------------------------------------
# Generate a report on requests that generated 404 codes.
#----------------------------------------------------------------------------
Example #47
0
def test_fromregex():
    time.sleep(0.2)
    regexp = r"(\d+)\s+(...)"  # match [digits, whitespace, anything]
    return numpy.fromregex("test_fromregex.dat", regexp, [("num", numpy.int64), ("key", "S3")])
Example #48
0
        ret_regex += '(' + record[i][1] + '{' + record[i][2] + '})'
    return r'(' + record_type  + ')' + ret_regex + r'\n'

def data_type_list(record):
    ret_val_list = []
    for i in range(0, len(record)):
        ret_val_list.append((record[i][0], record[i][3]))
    return ret_val_list        

#def main():  Take this out while working in ipython
input_file = sys.argv[1]


regexp = regex_string(pvf_weights_rec, PVF_WEIGHTING_REC_TYPE) 
data_types = data_type_list(pvf_weights_rec) 
weights_df = pd.DataFrame(np.fromregex(input_file, regexp, data_types)) 
del weights_df['Padding']

#Get weights for BBC network panel only.
weights_df = weights_df[weights_df.ReportingPanel == BBC_NETWORK_PANEL]

regexp = regex_string(pvf_members_rec, PVF_MEMBER_REC_TYPE) 
data_types = data_type_list(pvf_members_rec) 
members_df = pd.DataFrame(np.fromregex(input_file, regexp, data_types)) 
del members_df['Padding']

regexp = regex_string(pvf_vwg_rec, PVF_VWG_REC_TYPE) 
data_types = data_type_list(pvf_vwg_rec) 
vwg_df = pd.DataFrame(np.fromregex(input_file, regexp, data_types)) 
del vwg_df['Padding']
Example #49
0
    input = np.array(input, dtype=np.int64)
    score = input.dot(weights_matrix)
    if np.any(score < 0):
        return 0
    else:
        return functools.reduce(operator.mul, score)
        
def score_part2(input, weights_matrix):
    input = np.array(input, dtype=np.int64)
    score = input.dot(weights_matrix)
    if np.any(score < 0):
        return 0
    elif score[-1] != 500:
        return 0
    else:
        return functools.reduce(operator.mul, score[:-1])
        
def partitions():
    for x in range(101):
        for y in range(101-x):
            for z in range((101-x)-y):
                for q in range(((101-x)-y)-z):
                    yield x,y,z,q

regex = r'\w+: capacity (-?\d+), durability (-?\d+), flavor (-?\d+), texture (-?\d+), calories (\d+)'

weights = np.fromregex('input.txt', regex, np.int64)
weights_no_cals = weights[:, :4]

print(max(score(split, weights_no_cals) for split in partitions()))
print(max(score_part2(split, weights) for split in partitions()))
Example #50
0
                if np.any(new_amounts < 0):
                    continue

                new_score = score(new_amounts, specs)

                if new_score > best_score:
                    best_score = new_score
                    best_amounts = new_amounts

    return (best_amounts, best_score - cur_score)


with open('input', 'r') as f:
    regex = r'.*: capacity (-?\d+), durability (-?\d+), flavor (-?\d+), texture (-?\d+)'
    specs = np.fromregex(f, regex, np.int)

# Start with a decent guess
num_ingredients = len(specs)
total_capacity = 100

start_amount = total_capacity / num_ingredients
amounts = np.ones((1, num_ingredients), np.int) * start_amount
# Correct for the slack
amounts[0, -1] = total_capacity - (start_amount * (num_ingredients - 1))

initial_score = score(amounts, specs)

# Find the best dimensional improvement
(new_amounts, new_score) = best_improvement(amounts, specs)
while new_score > 0:
Example #51
0
def loadFaceFeatures(path, filename):
    return np.fromregex(path + filename + '.txt', r'.*=(\d+) (\d+)\n', dtype=int)
Example #52
0
   	-0.100     2.141     5.081\n
   	-0.100     3.282     5.262\n
   	''')
parser.add_argument('filename',help='300.txt')
args = parser.parse_args()

print("~ Filename: {}".format(args.filename))

# fig = plt.figure(num=None, figsize=(12, 6), dpi=120, facecolor='w', edgecolor='k')
# ax = fig.gca(projection='3d')
#       
#        x = np.fromregex(c, r"(\d+)\s+...", dt)
dt = np.dtype([	('X', np.float),
				('Y',  np.float), 
				('Z', np.float)])
data = np.fromregex(args.filename,regexp,dt)
# data = np.genfromtxt(args.filename)
x = data['X']
y = data['Y']
z = data['Z']
# print data

print x
print y
print z

for xv in x:
	for yv in y:
		for zv in z:
			print " ", xv + "  " + yv + "  " + zv
	
Example #53
0
    properties[properties < 0] = 0
    # We multiply the properties we have for the cookie (tuple style)
    return np.prod(properties[:-1])

# Splits the number of spoons for each possible ingredient
def split(sums, n):
    if sums == 1:
        yield (n, )
    else:
        # being xrange the number of spoons we can have
        for a in range(n+1):
            # Calling the function in the function... WOOOOAH!
            for rest in split(sums-1, n-a):
                # Yield is the same as return, but it returns a Generator 
                # (it only calculates it once)
                yield ((a, ) + rest)

regex = r'\w+: capacity (-?\d+), durability (-?\d+), flavor (-?\d+), texture (-?\d+), calories (\d+)'
# We extract using the regex all the values from the int
ingredients = np.fromregex('input15', regex, np.int)

# max_score = -1e32
# for spoons in split(len(ingredients), 100):
#     max_score = max(score(spoons, ingredients), max_score)
# print 'The best cookie will have a value of {} units.'.format(max_score)

# We calculate the scores given the number of spoons for each ingredient (split spoons for ingredient)
scores = [score(spoons, ingredients) for spoons in split(len(ingredients), 100)]

print 'The best cookie will have a value of {} units.'.format(max(scores))
Example #54
0
	M, T, intercept = model
	MMinv = la.inv( ## implement (X'X)^{-1} (X'Y)
		np.dot( M.T, M ) ) 
	coef = np.dot( MMinv,
		np.dot( M.T, T ) )
## Estimate the residual standard deviation
	resid = T - np.dot(M, coef)
	dof = len( T ) - len( coef )
	RSS = np.dot( resid.T, resid )
	return (coef, RSS, dof, MMinv )

#####################################################################
#+ 0. Load the data (yes, it is a milestone!)
## Load the word count dataset
wordcount = np.fromregex(
	'./data/wordcounts.txt', r"(\d+)\s+(.{,32})",
	[ ( 'freq', np.int64 ), ( 'word', 'S32' ) ] )


#####################################################################
##+ 1. Check that Zipf's Law holds
## Pre-sort the frequencies: in ascending order of frequencies
wordcount.sort( order = 'freq' )
freqs = wordcount[ 'freq' ]
## PRoduce ranks: from 1 up to |W|
ranks = np.arange( 1, len( wordcount ) + 1, dtype = float )[::-1]
## The probability of a word frequency being not less than the 
##  frequency of a gien word w it exactly the ratio of the w's rank
##  to the total number of words.
probs = ranks / len( wordcount )
Example #55
0
mask_z = cat[:,key['z']] < -10
mask_JK = cat[:,key['J']]-cat[:,key['K']] > JK_Rz[0]
mask_Rz = cat[:,key['R']]-cat[:,key['z']] < JK_Rz[1]
mask_color = mask_J+mask_K+mask_z+mask_JK+mask_Rz > 0
Nint = numpy.shape(cat)[0]
cat = cat[mask_color,:]
Nfin = numpy.shape(cat)[0]
Ncut = Nint-Nfin
print 'obsplan: {0} rows were removed from the catalog with {1} initial rows, leaving {2} rows'.format(Ncut,Nint,Nfin)
print 'obsplan: apply the redshift filter'
cat = tools.catfilter(redshift_bounds[0],redshift_bounds[1],cat,key['z_b'])

## region file catalog filter
print 'obsplan: apply the regions filter'
# find all the box regions
box = numpy.fromregex(regfile,r"box\(([0-9]*\.?[0-9]+),([0-9]*\.?[0-9]+),([0-9]*\.?[0-9]+)\",([0-9]*\.?[0-9]+)\",([0-9]*\.?[0-9]+)",
                      [('xc',numpy.float),('yc',numpy.float),('width',numpy.float),('height',numpy.float),('angle',numpy.float)])

d2r = numpy.pi/180.0
#loop through the regions creating masks for galaxy inclusion
for i in numpy.arange(numpy.shape(box)[0]):
    #phi is the ccw angle from the +East axis
    xc = box[i][0]
    yc = box[i][1]
    w = box[i][2]
    h = box[i][3]
    phi=box[i][4]*d2r
    #rotate the galaxies into the "primed" (p) region coorditate frame centered
    #at the center of the region
    ra_p = (cat[:,key['ra']]-xc)*numpy.cos(yc*d2r)*numpy.cos(-phi)+(cat[:,key['dec']]-yc)*numpy.sin(-phi)
    dec_p = -(cat[:,key['ra']]-xc)*numpy.cos(yc*d2r)*numpy.sin(-phi)+(cat[:,key['dec']]-yc)*numpy.cos(-phi)
    #determine the min and max bounds of the region
Example #56
0
    inputPrefix_noType = inputPrefix_noType + inputPrefix[i]
    if i < (len(inputPrefix)-2):
        inputPrefix_noType = inputPrefix_noType + '.'


s='_%s.pdf' % (dim)
outputName = inputPrefix_noType+s


########################
## Load input file #####
########################


regexp = r"([^\s]+)\s+([^\s]+)\s+([^\s]+)\s+([^\s]+)\s+([^\s]+)"
output = np.fromregex(sys.argv[1], regexp,
	        [('x', np.float), ('z', np.float), ('dens', np.float), ('fx', np.float), ('fz', np.float) ])


X = np.reshape(output['x'],    (26,26))
Y = np.reshape(output['z'],    (26,26))
D = np.reshape(output['dens'], (26,26))
U = np.transpose(np.reshape(output['fx'],   (26,26)))
V = np.transpose(np.reshape(output['fz'],   (26,26)))
for i in range(26):
    for j in range(26):
        if U[i][j] < threshold:
            U[i][j] = 0
        if V[i][j] < threshold:
            V[i][j] = 0

speed = np.sqrt(U*U + V*V)
Example #57
0
gf_dtype = [('day', 'i2'), ('month', 'S3'), ('year', 'i4'), ('open', 'f4'), ('high', 'f4'), ('low', 'f4'), ('close', 'f4'), ('volume', 'i4')]

yf_regex = r"(\d{4})-(\d{2})-(\d{2}),(\d+[.]\d+),(\d+[.]\d+),(\d+[.]\d+),(\d+[.]\d+),(\d+),(\d+[.]\d+)"
yf_dtype = [('year', 'i4'), ('month', 'i2'), ('day', 'i2'), ('open', 'f4'), ('high', 'f4'), ('low', 'f4'), ('close', 'f4'), ('volume', 'i4'), ('adj_close', 'f4')]


def load_quote(quote, overwrite=False):
    filename = "%s.csv" % quote
    if overwrite is True or \
      (overwrite is False and not os.path.exists(filename)):
        try:
            yh_download(quote)
        except urllib2.HTTPError, e:
            print "Error with %s" % quote
            return None        
    return fromregex(filename, yf_regex, yf_dtype)
    

def old_method():
    names = ['yhoo', 'goog', 'ibm', 'ge', 'msft', 'aapl', 'ebay', 'dell', 'csco', 'siri']
    quotes = dict()
    for name in names:
        arr = load_quote(name)
        quotes[name] = arr
    return quotes

def load_eurostoxx50(overwrite=False):    
    """
    Loads the EuroStoxx 50 index components and returns a dictionnary
    """
    stocks = ["ABI.BR", "ACA.PA", "AGN.AS", "AI.PA", "ALO.PA", "ALV.DE",