Esempio n. 1
0
def _get_URI(name='GENEVA',ID=None,**kwargs):
    """
    Build GCPD URI from available options.
    
    kwargs are to catch unused arguments.
    
    @param name: photometric system name (E.g. JOHNSON, STROMGREN, GENEVA...)
    @type name: string (automatically uppercased)
    @keyword ID: star name (should be resolvable by SIMBAD)
    @type ID: string
    @return: 
    """
    #-- GCPD is poor at recognising aliases: therefore we try different
    #   identifiers retrieved from Sesame that GCPD understands
    recognized_alias = ['HD','BD',"CD"]
    
    try:
        aliases = sesame.search(ID)['alias']
        for alias in aliases:
            if alias[:2] in recognized_alias:
                ID = alias[:2]+' '+alias[2:]
                break
        else:
            logger.error('Star %s has no aliases recognised by GCPD: query will not return results'%(ID))
    except KeyError:
        logger.error('Unknown star %s: GCPD query will not return results'%(ID))
    
    
    base_url = 'http://obswww.unige.ch/gcpd/cgi-bin/photoSys.cgi?phot=%02d&type=original&refer=with&mode=starno&ident=%s'%(systems[name],urllib.quote(ID))
    logger.debug(base_url)
    return base_url
 def __set_object(self,objectname):
     """
     Retrieve coordinates for an object in 'ephem'-style.
     """
     try:
         jpos = sesame.search(objectname,db='S')['jpos']
         logger.info('Found object %s at %s'%(objectname,jpos))
     except KeyError:
         logger.warning('Object %s not found in SIMBAD, trying NED.'%(objectname))
         try:
             jpos = sesame.search(objectname,db='N')['jpos']
         except KeyError:
             logger.warning('Object %s not found in NED either.'%(objectname))
             raise IOError, 'No coordinates retrieved for object %s.'%(objectname)
     myobject = ephem.readdb("%s,f|M|A0,%s,8.0,2000"%(objectname,','.join(jpos.split())))
     return myobject
def _get_URI(name='GENEVA',ID=None,**kwargs):
    """
    Build GCPD URI from available options.

    kwargs are to catch unused arguments.

    @param name: photometric system name (E.g. JOHNSON, STROMGREN, GENEVA...)
    @type name: string (automatically uppercased)
    @keyword ID: star name (should be resolvable by SIMBAD)
    @type ID: string
    @return:
    """
    #-- GCPD is poor at recognising aliases: therefore we try different
    #   identifiers retrieved from Sesame that GCPD understands
    recognized_alias = ['HD','BD',"CD"]

    try:
        aliases = sesame.search(ID)['alias']
        for alias in aliases:
            if alias[:2] in recognized_alias:
                ID = alias[:2]+' '+alias[2:]
                break
        else:
            logger.error('Star %s has no aliases recognised by GCPD: query will not return results'%(ID))
    except KeyError:
        logger.error('Unknown star %s: GCPD query will not return results'%(ID))


    base_url = 'http://obswww.unige.ch/gcpd/cgi-bin/photoSys.cgi?phot=%02d&type=original&refer=with&mode=starno&ident=%s'%(systems[name],urllib.parse.quote(ID))
    logger.debug(base_url)
    return base_url
 def __set_object(self, objectname):
     """
     Retrieve coordinates for an object in 'ephem'-style.
     """
     try:
         jpos = sesame.search(objectname, db="S")["jpos"]
         logger.info("Found object %s at %s" % (objectname, jpos))
     except KeyError:
         logger.warning("Object %s not found in SIMBAD, trying NED." % (objectname))
         try:
             jpos = sesame.search(objectname, db="N")["jpos"]
         except KeyError:
             logger.warning("Object %s not found in NED either." % (objectname))
             raise IOError, "No coordinates retrieved for object %s." % (objectname)
     myobject = ephem.readdb("%s,f|M|A0,%s,8.0,2000" % (objectname, ",".join(jpos.split())))
     return myobject
Esempio n. 5
0
def get_sismo_data(ID):
    """
    Retrieve CoRoT timeseries from a local data repository.
    
    The output record array has fields 'HJD', 'flux', 'e_flux', 'flag'.
    
    @param ID: ID of the target: either an integer (CoRoT ID), an SIMBAD-recognised
    target name, or a valid CoRoT FITS file
    @type ID: int or str
    @return: data, header
    @rtype: numpy recarray, dict
    """
    #-- data on one target can be spread over multiple files: collect the
    #   data
    data = []

    if isinstance(ID, str) and os.path.isfile(ID):
        header = pf.getheader(ID)
        times, flux, error, flags = fits.read_corot(ID)
        data.append([times, flux, error, flags])
    else:
        #-- resolve the target's name: it's either a target name or CoRoT ID.
        try:
            ID = int(ID)
        except ValueError:
            info = sesame.search(ID, db='S')
            IDs = [alias for alias in info['alias'] if 'HD' in alias]
            if len(IDs) != 1:
                logger.error(
                    "Data retrieval for %s not possible. Reason: no HD number resolved"
                    % (ID))
                return
            ID = IDs[0]
        #-- collect the files containing data on the target
        catfiles = config.glob((os.sep).join(['catalogs', 'corot', 'sismo']),
                               '*.fits')
        for catfile in catfiles:
            try:
                header = pf.getheader(catfile)
            except IOError:
                continue
            if header['starname'] == ID or header['corotid'].replace(
                    ' ', '') == '%s' % (ID):
                times, flux, error, flags = fits.read_corot(catfile)
                data.append([times, flux, error, flags])
    #-- now make a record array and sort according to times
    if not data:
        raise ValueError(
            'target {0} not in offline CoRoT data repository'.format(ID))
    data = np.hstack(data)
    data = np.rec.fromarrays(data,
                             dtype=[('HJD', '>f8'), ('flux', '>f8'),
                                    ('e_flux', '>f8'), ('flag', 'i')])
    sa = np.argsort(data['HJD'])
    return data[sa], header
Esempio n. 6
0
 def __set_object(self, objectname):
     """
     Retrieve coordinates for an object in 'ephem'-style.
     """
     try:
         jpos = sesame.search(objectname, db='S')['jpos']
         logger.info('Found object %s at %s' % (objectname, jpos))
     except KeyError:
         logger.warning('Object %s not found in SIMBAD, trying NED.' %
                        (objectname))
         try:
             jpos = sesame.search(objectname, db='N')['jpos']
         except KeyError:
             logger.warning('Object %s not found in NED either.' %
                            (objectname))
             raise IOError, 'No coordinates retrieved for object %s.' % (
                 objectname)
     myobject = ephem.readdb("%s,f|M|A0,%s,8.0,2000" %
                             (objectname, ','.join(jpos.split())))
     return myobject
Esempio n. 7
0
def get_sismo_data(ID):
    """
    Retrieve CoRoT timeseries from a local data repository.
    
    The output record array has fields 'HJD', 'flux', 'e_flux', 'flag'.
    
    @param ID: ID of the target: either an integer (CoRoT ID), an SIMBAD-recognised
    target name, or a valid CoRoT FITS file
    @type ID: int or str
    @return: data, header
    @rtype: numpy recarray, dict
    """
    #-- data on one target can be spread over multiple files: collect the
    #   data
    data = []
        
    if isinstance(ID,str) and os.path.isfile(ID):
        header = pyfits.getheader(ID)
        times,flux,error,flags = fits.read_corot(ID)
        data.append([times,flux,error,flags])
    else:
        #-- resolve the target's name: it's either a target name or CoRoT ID.
        try:
            ID = int(ID)
        except ValueError:
            info = sesame.search(ID,db='S')
            IDs = [alias for alias in info['alias'] if 'HD' in alias]
            if len(IDs)!=1:
                logger.error("Data retrieval for %s not possible. Reason: no HD number resolved" % (ID))
                return
            ID = IDs[0]
        #-- collect the files containing data on the target
        catfiles = config.glob((os.sep).join(['catalogs','corot','sismo']),'*.fits')
        for catfile in catfiles:
            try:
                header = pyfits.getheader(catfile)
            except IOError:
                continue
            if header['starname']==ID or header['corotid'].replace(' ','')=='%s'%(ID):
                times,flux,error,flags = fits.read_corot(catfile)
                data.append([times,flux,error,flags])
    #-- now make a record array and sort according to times
    if not data:
        raise ValueError('target {0} not in offline CoRoT data repository'.format(ID))
    data = np.hstack(data)        
    data = np.rec.fromarrays(data,dtype=[('HJD','>f8'),('flux','>f8'),('e_flux','>f8'),('flag','i')])
    sa = np.argsort(data['HJD'])
    return data[sa],header
Esempio n. 8
0
def _get_URI(name='GENEVA', ID=None, **kwargs):
    """
    Build GCPD URI from available options.

    kwargs are to catch unused arguments.

    @param name: photometric system name (E.g. JOHNSON, STROMGREN, GENEVA...)
    @type name: string (automatically uppercased)
    @keyword ID: star name (should be resolvable by SIMBAD)
    @type ID: string
    @return:
    """
    #-- GCPD is poor at recognising aliases: therefore we try different
    #   identifiers retrieved from Sesame that GCPD understands SA:SAO, HI: HIP/HIC
    recognized_alias = {'HD': 100, 'SA': 150, "HI": 160, 'PP': 170}
    ubv = False
    # recognized_alias = ['HD','BD',"CD"]
    try:
        aliases = sesame.search(ID)['alias']
        aliases.sort(reverse=True)
        #First check if there is uvby, that directly gives us the ID for GCPD
        for alias in aliases:
            if alias.startswith('uvby98'):
                ID = alias.split('uvby98')[1].strip()
                ubv = True
                break
        #If not, reorder to look for IDs from HD -> SAO
        if not ubv:
            aliases.sort()
            for alias in aliases:
                if alias[:2] in recognized_alias.keys():
                    # if alias[:2] in recognized_alias:
                    # ID = alias[:2]+' '+alias[2:]
                    ID = f'{recognized_alias[alias[2:]]}{alias[2:].strip().zfill(6)}'
                    break
            else:
                logger.error(
                    f'Star {ID} has no aliases recognised by GCPD: query will not return results'
                )
    except KeyError:
        logger.error(f'Unknown star {ID}: GCPD query will not return results')

    # base_url = 'http://obswww.unige.ch/gcpd/cgi-bin/photoSys.cgi?phot=%02d&type=original&refer=with&mode=starno&ident=%s'%(systems[name],urllib.parse.quote(ID))
    base_url = 'https://gcpd.physics.muni.cz/cgi-bin/photoSys.cgi?phot=%02d&type=original&refer=with&mode=starno&ident=%s' % (
        systems[name], urllib.parse.quote(ID))
    logger.debug(base_url)
    return base_url
Esempio n. 9
0
def galex(**kwargs):
    """
    Cone search for Galex targets.
    """
    ID = kwargs.pop('ID', None)
    if ID is None:
        ra, dec = kwargs.pop('ra'), kwargs.pop('dec')
    else:
        info = sesame.search(ID, db='A')
        if not 'jradeg' in info: return None, None, None
        ra, dec = info['jradeg'], info['jdedeg']
    radius = 0.1
    #radius = radius/60.
    base_url = 'http://galex.stsci.edu/gxws/conesearch/conesearch.asmx/ConeSearchToXml?ra={0:f}&dec={1:f}&sr={2:f}&verb=1'.format(
        ra, dec, radius)
    #base_url = 'http://galex.stsci.edu/GR4/?page=searchresults&RA={ra:f}&DEC={dec:f}&query=no'.format(ra=ra,dec=dec)
    url = urllib.URLopener()
    filen, msg = url.retrieve(base_url, filename=None)
    fuv_flux, e_fuv_flux = None, None
    columns = [
        '_r', 'ra', 'dec', 'fuv_flux', 'fuv_fluxerr', 'nuv_flux', 'nuv_fluxerr'
    ]
    values = [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan]

    #flux in microJy
    units = dict(fuv_flux='muJy', nuv_flux='muJy')

    got_target = False
    with open(filen, 'r') as ff:
        for line in ff.readlines():
            for i, col in enumerate(columns):
                if col + '>' in line:
                    values[i] = np.float(line.split('>')[1].split('<')[0])
                    got_target = (col == 'fuv_fluxerr')
            if got_target:
                break

    values[0] = np.sqrt((values[1] - ra)**2 + (values[2] - dec)**2) * 3600
    columns[1] = '_RAJ2000'
    columns[2] = '_DEJ2000'

    results = np.rec.fromarrays(np.array([values]).T, names=columns)
    if np.all(np.isnan(np.array(values))):
        results = None

    return results, units, None
Esempio n. 10
0
def galex(**kwargs):
    """
    Cone search for Galex targets.
    """
    ID = kwargs.pop('ID',None)
    if ID is None:
        ra,dec = kwargs.pop('ra'),kwargs.pop('dec')
    else:
        info = sesame.search(ID,db='A')
        if not 'jradeg' in info: return None,None,None
        ra,dec = info['jradeg'],info['jdedeg']
    radius = 0.1
    #radius = radius/60.
    base_url = 'http://galex.stsci.edu/gxws/conesearch/conesearch.asmx/ConeSearchToXml?ra={0:f}&dec={1:f}&sr={2:f}&verb=1'.format(ra,dec,radius)
    #base_url = 'http://galex.stsci.edu/GR4/?page=searchresults&RA={ra:f}&DEC={dec:f}&query=no'.format(ra=ra,dec=dec)
    url = urllib.URLopener()
    filen,msg = url.retrieve(base_url,filename=None)
    fuv_flux,e_fuv_flux = None,None
    columns = ['_r','ra','dec','fuv_flux','fuv_fluxerr','nuv_flux','nuv_fluxerr']
    values = [np.nan,np.nan,np.nan,np.nan,np.nan,np.nan,np.nan]
    
    #flux in microJy
    units = dict(fuv_flux='muJy',nuv_flux='muJy')
    
    got_target = False
    with open(filen,'r') as ff:
        for line in ff.readlines():
            for i,col in enumerate(columns):
                if col+'>' in line:
                    values[i] = np.float(line.split('>')[1].split('<')[0])
                    got_target = (col=='fuv_fluxerr')
            if got_target: 
                break
    
    values[0] = np.sqrt( (values[1]-ra)**2 + (values[2]-dec)**2)*3600
    columns[1] = '_RAJ2000'
    columns[2] = '_DEJ2000'
    
    results = np.rec.fromarrays(np.array([values]).T,names=columns)
    if np.all(np.isnan(np.array(values))):
        results = None
    
    return results,units,None
Esempio n. 11
0
def search(ID, radius=1., filename=None):
    """
    Retrieve datafiles from the Coralie catalogue.

    We search on coordinates, pulled from SIMBAD. If the star ID is not
    recognised, a string search is performed to match the 'targ name' field in the
    FITS headers.

    Only the s1d_A data are searched.

    @param ID: ID of the star, understandable by SIMBAD
    @type ID: str
    @param radius: search radius around the coordinates
    @type radius: 1
    @param filename: write summary to outputfile if not None
    @type filename: str
    @return: record array with summary information on the observations, as well
    as their location (column 'filename')
    @rtype: numpy rec array
    """
    data = ascii.read2recarray(config.get_datafile(
        os.path.join('catalogs', 'coralie'), 'CoralieFullDataOverview.tsv'),
                               splitchar='\t')
    info = sesame.search(ID)
    if info:
        ra, dec = info['jradeg'], info['jdedeg']
        keep = np.sqrt((data['ra'] - ra)**2 +
                       (data['dec'] - dec)**2) < radius / 60.
    else:
        keep = [((re.compile(ID).search(objectn) is not None) and True
                 or False) for objectn in data['object']]
        keep = np.array(keep)

    data = data[keep]

    logger.info('Found %d spectra' % (len(data)))

    if filename is not None:
        ascii.write_array(data, filename, auto_width=True, header=True)
    else:
        return data
Esempio n. 12
0
def get_dss_image(ID,ra=None,dec=None,width=5,height=5):
    """
    Retrieve an image from DSS
    
    plot with
    
    >>> data,coords,size = mast.get_image('HD21389')
    >>> pl.imshow(data[::-1],extent=[coords[0]-size[0]/2,coords[0]+size[0]/2,
                                    coords[1]-size[1]/2,coords[1]+size[1]/2])
    """
    #-- set a reasonable timeout
    timeout = socket.getdefaulttimeout()
    socket.setdefaulttimeout(30.)
    if ra is None or dec is None:
        info = sesame.search(ID)
        ra,dec = info['jradeg'],info['jdedeg']
    url  = urllib.URLopener()
    myurl = "http://archive.stsci.edu/cgi-bin/dss_search?ra=%s&dec=%s&equinox=J2000&height=%s&generation=%s&width=%s&format=FITS"%(ra,dec,height,'2i',width)
    out = url.retrieve(myurl)
    data1 = pf.getdata(out[0])
    #-- reset timeout to original value
    socket.setdefaulttimeout(timeout)
    return data1,(ra,dec),(width/60.,height/60.)
Esempio n. 13
0
def search(ID,radius=1.,filename=None):
    """
    Retrieve datafiles from the Coralie catalogue.

    We search on coordinates, pulled from SIMBAD. If the star ID is not
    recognised, a string search is performed to match the 'targ name' field in the
    FITS headers.

    Only the s1d_A data are searched.

    @param ID: ID of the star, understandable by SIMBAD
    @type ID: str
    @param radius: search radius around the coordinates
    @type radius: 1
    @param filename: write summary to outputfile if not None
    @type filename: str
    @return: record array with summary information on the observations, as well
    as their location (column 'filename')
    @rtype: numpy rec array
    """
    data = ascii.read2recarray(config.get_datafile(os.path.join('catalogs','coralie'),'CoralieFullDataOverview.tsv'),splitchar='\t')
    info = sesame.search(ID)
    if info:
        ra,dec = info['jradeg'],info['jdedeg']
        keep = np.sqrt((data['ra']-ra)**2 + (data['dec']-dec)**2) < radius/60.
    else:
        keep = [((re.compile(ID).search(objectn) is not None) and True or False) for objectn in data['object']]
        keep = np.array(keep)

    data = data[keep]

    logger.info('Found %d spectra'%(len(data)))

    if filename is not None:
        ascii.write_array(data,filename,auto_width=True,header=True)
    else:
        return data
def getP7Data(ID=None, code=None, include_nans=True):
    """
    Extract P7 timeseries from the catalog.

    WARNING: only B{very} few target ID's can be resolved (HD,HIP,SAO and that's
    about it)

    WARNING: there could be nan's in the data somewhere. If you don't want
    nan's anywhere, set 'include_nans' to False.

    @param ID: target ID (limited!)
    @type ID: str
    @param code: target's GENEVA code (e.g. 100180642 for HD180642)
    @type code: int
    @return: record array containing times (HJD) and corresponding GENEVA mags,
    and a dictionary with header information (only source=P7)
    @rtype: np record array,dict
    """
    if ID is not None:
        if not 'HD' in ID or not 'SAO' in ID or not 'HIC' in ID:
            info = sesame.search(ID)
            print(info)
            if 'alias' in info:
                for alias in info['alias']:
                    if 'HD' in alias:
                        ID = alias
                        break
                    if 'SAO' in alias:
                        ID = alias
                        break
                    if 'HIC' in alias:
                        ID = alias
                        break
        # this should resolve the GENEVA name
        code = _geneva_name_resolver(ID=ID)

    catfile = config.get_datafile('catalogs/p7', 'p7photometry.fits')
    ff = pf.open(catfile)

    valid = ff[1].data.field('CODE') == code
    hjd = ff[1].data.field('HJD')[valid]
    U = ff[1].data.field('U')[valid]
    B = ff[1].data.field('B')[valid]
    B1 = ff[1].data.field('B1')[valid]
    B2 = ff[1].data.field('B2')[valid]
    V = ff[1].data.field('V')[valid]
    V1 = ff[1].data.field('V1')[valid]
    G = ff[1].data.field('G')[valid]
    ff.close()

    data = np.rec.fromarrays([hjd, U, B, B1, B2, V, V1, G],
                             names='HJD,U,B,B1,B2,V,V1,G')

    logger.info('Retrieved %d photometric points from P7' % (len(data)))

    if not include_nans:
        nans = np.isnan(data['HJD'])
        for name in data.dtype.names:
            nans = nans | np.isnan(data[name])
        data = data[-nans]
        logger.info('Keeping %d photometric points without "NaN" from P7' %
                    (len(data)))

    return data, {'source': 'P7'}
Esempio n. 15
0
def getHipData(ID, dtype='ep', outputFileName=None):
    """
    Retrieve Hipparcos epoch/intermediate photometry from the ESA website.

    The time series together with some header information is stored in a record
    array and dictionary, and optionally written in a specified file.

    The time points are given in barycentric Julian Date and are corrected
    for the offset of 2440000.0 in the original files, but B{only in the output
    record array}. The output files display the B{original contents}.

    For epoch photometry, set C{dtype='ep'}.
    For intermediate date, set C{dtype='i'}.

    For more information:
    C{http://www.rssd.esa.int/SA-general/Projects/Hipparcos/CATALOGUE_VOL1/sect2_05.ps.gz}

    Example:

    >>> data,header = getHipData(1234)
    >>> data = data[data['q_mag'] <= 2]         # keep only the good points

    To write the retrieved data to a file:

    >>> data, header = getHipData(1234 , "myfile.txt")

    To store the different columns in separate arrays:

    >>> data, header = getHipData(1234)
    >>> time = data['time']
    >>> magnitude = data['mag']
    >>> errorbar = data['e_mag']
    >>> qualityflag = data['q_mag']

    In the case of intermediate data products:
        - orbit: orbit number
        - source: source of abscissa (F=FAST, f=rejected FAST, N=NDAC,n=NDAC rejected)
        - d_acosd: partial derivative wrt alpha cos(delta)
        - d_d: partial derivative wrt delta
        - d_pi: partial derivative wrt parallax
        - d_mua: partial derivative wrt proper motion alpha cos(delta)
        - d_mud: partial derivative wrt proper motion delta

    @param ID: identification of the star: if you give an integer or string that
    can be converted to an integer, it is assumed to be the hipparcos number of
    the star.  E.g. 1234 or "1234". If it is not an integer, the star will
    be resolved via sesame to get the HIP number if possible
    @type ID: integer or string
    @param dtype: data type (epoch ('ep') photometry or intermediate ('i') data)
    @type dtype: string (one of ('ep','i'))
    @param outputFileName: the name of the file that will be created
                           to save the Hipparcos time series
    @type outputFileName: string
    @return: record array with fields time, mag, e_mag (errorbar),
             q_mag (quality flag), and a dictionary containing the
             header information. The header dictionary is of style
             {'HH14': ('A', 'Annex flag (light curves)'), ...}
    @rtype: rec array, dict
    """

    server = "www.rssd.esa.int"
    webpage = "/hipparcos_scripts/HIPcatalogueSearch.pl?hip%sId=" % (dtype)

    # Resolve the name if necessary (i.e., if it's not a HIP number). If the
    # star has no HIP number, log an error and return None
    try:
        hipnr = int(ID)
    except ValueError:
        info = sesame.search(ID, db='S')
        IDs = [alias for alias in info['alias'] if 'HIP' in alias]
        if len(IDs) != 1:
            logger.error(
                "Data retrieval for %s not possible. Reason: no HIP number resolved"
                % (ID))
            return
        hipnr = IDs[0].split(' ')[1]

    # Connect to the website, en retrieve the wanted webpage

    conn = http.client.HTTPConnection(server)
    conn.request("GET", webpage + str(hipnr))
    response = conn.getresponse()
    if response.reason != "OK":
        logger.error("Data retrieval for HIP%s not possible. Reason: %s" %
                     (str(hipnr), response.reason))
        return
    else:
        logger.info("Data retrieval for HIP%s: OK" % str(hipnr))

    contents = response.read()
    conn.close()

    # Parse the webpage, to remove the html codes (line starts with <").
    # Put a "#" in front of the header information, and format nicely.
    # Write to the output file if asked for.

    data = []
    header = {}

    if outputFileName:
        outputFile = open(outputFileName, 'w')

    for line in contents.split('\n'):
        if line == "": continue
        if not line.startswith("<"):
            line = line.replace("\r", "")

            # This is the header

            if not line[0].isdigit():
                sline = line.split(':')

                # Only keep header entries of the style
                # "key: value information" in the dictionary

                if len(sline) == 2:
                    key, info = sline
                    info = info.split()
                    header[key] = (info[0], " ".join(info[1:]))
                if outputFileName:
                    line = "# " + line

            # This is the real contents

            else:
                data.append(line.split('|'))
                #-- correct for empty fields
                data[-1] = tuple([(entry.replace(' ', '') == '' and np.nan
                                   or entry) for entry in data[-1]])
            if outputFileName:
                outputFile.write(line + "\n")
    if outputFileName:
        outputFile.close()

    # Make a record array.
    # Choose the header names to be in the VizieR style.

    if dtype == 'ep':
        dtypes = [('time', 'f8'), ('mag', 'f8'), ('e_mag', 'f8'),
                  ('q_mag', 'i')]
    elif dtype == 'i':
        dtypes = [('orbit', 'i'), ('source', 'U1'), ('d_acosd', 'f8'),
                  ('d_d', 'f8'), ('d_pi', 'f8'), ('d_mua', 'f8'),
                  ('d_mud', 'f8'), ('abs_res', 'f8'), ('abs_std', 'f8'),
                  ('cor', 'f8')]
    data = np.rec.array(data, dtype=dtypes)

    # Fix the time offset
    if dtype == 'ep':
        data['time'] += 2440000.0

    return data, header
def get_photometry(ID=None,to_units='erg/s/cm2/AA',extra_fields=[],include=None,
         exclude=None,**kwargs):
    """
    Collect photometry from different sources.

    The output consists of a record array containing the following keys:

    'meas': the measurement's value directly from the catalog in original units
    'e_meas': the error on the measurements
    'flag': any flags that are associated with the measurement in a catalog
    'unit': the unit of the original measurement
    'source' the source catalog's name
    'photband': photometric pass bands' name
    'cwave': the effective wavelength of the passband
    'cmeas': converted measurement (to C{to_units})
    'e_cmeas': error on converted measurment
    'cunit': converted unit

    Be aware that some of the values can be 'nan': e.g. sometimes no error is
    listed in the catalog, or no flag. Also the 'cwave' column will be nan for
    all photometric colours (e.g. B-V)

    If you define C{extra_fields}, make sure all the {get_photometry} know how
    to handle it: probably some default values need to be inserted if these
    extra columns are not available in some catalog. It is safest just to leave
    it blank.

    You can include or exclude search sources via C{include} and C{exclude}.
    When given, these should be a list containing strings. The default is to
    include C{gator}, C{vizier} and C{gcpd}.

    Extra keyword arguments are passed to each C{get_photometry} functions in
    this package's modules.

    Example usage:

        1. You want to download all available photometry and write the results to
        an ASCII file for later reference.

        >>> master = get_photometry('vega')
        >>> ascii.write_array(master,header=True,auto_width=True)

        2. You want to plot the raw, unmodelled SED of an object:

        >>> master = get_photometry('vega')
        >>> pl.errorbar(master['cwave'],master['cmeas'],yerr=master['e_cmeas'],fmt='ko')
        >>> pl.gca().set_xscale('log',nonposx='clip')
        >>> pl.gca().set_yscale('log',nonposy='clip')

    We made no difference between colors (B-V) and magnitude (V), because the
    'cwave' for colors is 'nan', so they will not be plotted anyway.The final
    two lines are just to correct errorbars that go below zero in a logarithmic
    plot.

    @param ID: the target's name, understandable by SIMBAD
    @type ID: str
    @param to_units: units to convert everything to.
    @type to_units:
    @param include: sources to include
    @type include: list of strings (from C{gator}, C{vizier} or C{gcpd})
    @param exclude: sources to include
    @type exclude: list of strings (from C{gator}, C{vizier} or C{gcpd})
    @return: record array where eacht entry is a photometric measurement
    @rtype: record array
    """
    #-- make sure all catalog names are lower case
    if include is not None: include = [i.lower() for i in include]
    if exclude is not None: exclude = [i.lower() for i in exclude]

    #-- check which sources to include/exclude
    searchables = ['gator','vizier','gcpd','mast']
    if include is not None:
        searchables = include
    if exclude is not None:
        searchables = list( set(searchables)- set(exclude))

    #-- and search photometry
    if 'mast' in searchables:
        kwargs['master'] = mast.get_photometry(ID=ID,to_units=to_units,extra_fields=extra_fields,**kwargs)
    if 'gator' in searchables:
        kwargs['master'] = gator.get_photometry(ID=ID,to_units=to_units,extra_fields=extra_fields,**kwargs)
    if 'vizier' in searchables:
        #-- first query catalogs that can only be queried via HD number
        info = sesame.search(ID=ID,fix=True)
        if 'alias' in info:
            HDnumber = [name for name in info['alias'] if name[:2]=='HD']
            if HDnumber:
                kwargs['master'] = vizier.get_photometry(extra_fields=extra_fields,constraints=['HD=%s'%(HDnumber[0][3:])],sources=['II/83/catalog','V/33/phot'],sort=None,**kwargs)
        #-- then query catalogs that can only be queried via another catalog
        results,units,comms = vizier.search('J/A+A/380/609/table1',ID=ID)
        if results is not None:
            catname = results[0]['Name'].strip()
            kwargs['master'] = vizier.get_photometry(take_mean=True,extra_fields=extra_fields,constraints=['Name={0}'.format(catname)],sources=['J/A+A/380/609/table{0}'.format(tnr) for tnr in range(2,5)],sort=None,**kwargs)
        #-- then query normal catalogs
        kwargs['master'] = vizier.get_photometry(ID=ID,to_units=to_units,extra_fields=extra_fields,**kwargs)


    if 'gcpd' in searchables:
        kwargs['master'] = gcpd.get_photometry(ID=ID,to_units=to_units,extra_fields=extra_fields,**kwargs)
    master = kwargs['master']

    #-- now make a summary of the contents:
    photbands = [phot.split('.')[0]  for phot in master['photband']]
    contents = [(i,photbands.count(i)) for i in sorted(list(set(photbands)))]
    for phot in contents:
        logger.info('%10s: found %d measurements'%phot)
    return master
Esempio n. 17
0
def getP7Data(ID=None,code=None,include_nans=True):
    """
    Extract P7 timeseries from the catalog.
    
    WARNING: only B{very} few target ID's can be resolved (HD,HIP,SAO and that's
    about it)
    
    WARNING: there could be nan's in the data somewhere. If you don't want
    nan's anywhere, set 'include_nans' to False.
    
    @param ID: target ID (limited!)
    @type ID: str
    @param code: target's GENEVA code (e.g. 100180642 for HD180642)
    @type code: int
    @return: record array containing times (HJD) and corresponding GENEVA mags,
    and a dictionary with header information (only source=P7)
    @rtype: np record array,dict
    """
    if ID is not None:
        if not 'HD' in ID or not 'SAO' in ID or not 'HIC' in ID:
            info = sesame.search(ID)
            print info
            if 'alias' in info:
                for alias in info['alias']:
                    if 'HD' in alias:
                        ID = alias
                        break
                    if 'SAO' in alias:
                        ID = alias
                        break
                    if 'HIC' in alias:
                        ID = alias
                        break
        # this should resolve the GENEVA name
        code = _geneva_name_resolver(ID=ID)
        
    catfile = config.get_datafile('catalogs/p7','p7photometry.fits')
    ff = pf.open(catfile)

    valid = ff[1].data.field('CODE')==code
    hjd = ff[1].data.field('HJD')[valid]
    U = ff[1].data.field('U')[valid]
    B = ff[1].data.field('B')[valid]
    B1 = ff[1].data.field('B1')[valid]
    B2 = ff[1].data.field('B2')[valid]
    V = ff[1].data.field('V')[valid]
    V1 = ff[1].data.field('V1')[valid]
    G = ff[1].data.field('G')[valid]
    ff.close()
    
    data = np.rec.fromarrays([hjd,U,B,B1,B2,V,V1,G],names='HJD,U,B,B1,B2,V,V1,G')
    
    logger.info('Retrieved %d photometric points from P7'%(len(data)))
    
    if not include_nans:
        nans = np.isnan(data['HJD'])
        for name in data.dtype.names:
            nans = nans | np.isnan(data[name])
        data = data[-nans]
        logger.info('Keeping %d photometric points without "NaN" from P7'%(len(data)))
    
    return data,{'source':'P7'}
Esempio n. 18
0
def search(ID=None,time_range=None,prog_ID=None,data_type='cosmicsremoved_log',
           radius=1.,filename=None):
    """
    Retrieve datafiles from the Hermes catalogue.

    B{If C{ID} is given}: A string search is performed to match the 'object'
    field in the FITS headers. The coordinates are pulled from SIMBAD. If the
    star ID is recognised by SIMBAD, an additional search is done based only on
    the coordinates. The union of both searches is the final result.

    B{If C{time_range} is given}: The search is confined within the defined
    range. If you only give one day, the search is confined to the observations
    made during the night starting at that day. If C{ID} is not given, all
    observations will be returned of the given datatype.

    B{If C{prog_ID} is given}: The search is performed to match the number of
    the program. Individual stars are not queried in SIMBAD, so any information
    that is missing in the header will not be corrected.

    If you don't give either ID or time_range, the info on all data will be
    returned. This is a huge amount of data, so it can take a while before it
    is returned. Remember that the header of each spectrum is read in and checked.

    Data type can be any of:
        1. cosmicsremoved_log: return log merged without cosmics
        2. cosmicsremoved_wavelength: return wavelength merged without cosmics
        3. ext_log: return log merged with cosmics
        4. ext_wavelength: return wavelength merged with cosmics
        5. raw: raw files (also TECH..., i.e. any file in the raw directory)

    This functions needs a C{HermesFullDataOverview.tsv} file located in one
    of the datadirectories from C{config.py}, and subdirectory C{catalogs/hermes}.

    If this file does not exist, you can create it with L{make_data_overview}.

    If you want a summary file with the data you search for, you can give
    C{filename} as an extra keyword argument. The results will be saved to that
    file.

    The columns in the returned record array are listed in L{make_data_overview},
    but are repeated here (capital letters are directly retrieved from the
    fits header, small letters are calculated values. The real header strings
    are all small capitals):

        1.  UNSEQ
        2.  PROG_ID
        3.  OBSMODE
        4.  BVCOR
        5.  OBSERVER
        6.  OBJECT
        7.  RA
        8.  DEC
        9.  BJD
        10. EXPTIME
        11. PMTOTAL
        12. DATE-AVG
        13. OBJECT
        14. airmass
        15. filename

    The column C{filename} contains a string with the absolute location of the
    file. If you need any extra information from the header, you can easily
    retrieve it.

    If BVCOR or BJD are not available from the FITS header, this function will
    attempt to calculate it. It will not succeed if the object's name is not
    recognised by SIMBAD.

    Example usage: retrieve all data on HD50230

    >>> mydata = search('HD50230')

    Keep only those with a long enough exposure time:

    >>> myselection = mydata[mydata['exptime']>500]

    Look up the 'telalt' value in the FITS headers of all these files via a fast
    list comprehension:

    >>> telalts = [pf.getheader(fname)['telalt'] for fname in myselection['filename']]

    Search for all data of HD50230 taken in the night of 22 September 2009:

    >>> data = hermes.search('HD50230',time_range='2009-9-22')

    Or within an interval of a few days:

    >>> data = hermes.search('HD50230',time_range=('2009-9-23','2009-9-30'))

    Search for all data observed in a given night:

    >>> data = hermes.search(time_range='2009-9-22')

    B{Warning:} the heliocentric correction is not calculated when no ID is given,
    so make sure it is present in the header if you need it, or calculate it yourself.

    @param ID: ID of the star, understandable by SIMBAD
    @type ID: str
    @param time_range: range of dates to confine the search to
    @type time_range: tuple strings of type '2009-09-23T04:24:35.712556' or '2009-09-23'
    @param data_type: if None, all data will be returned. Otherwise, subset
    'cosmicsremoved', 'merged' or 'raw'
    @type data_type: str
    @param radius: search radius around the coordinates (arcminutes)
    @type radius: float
    @param filename: write summary to outputfile if not None
    @type filename: str
    @return: record array with summary information on the observations, as well
    as their location (column 'filename')
    @rtype: numpy rec array
    """
    #-- read in the data from the overview file, and get SIMBAD information
    #   of the star
    ctlFile = '/STER/mercator/hermes/HermesFullDataOverview.tsv'
    data = ascii.read2recarray(ctlFile, splitchar='\t')
    #data = ascii.read2recarray(config.get_datafile(os.path.join('catalogs','hermes'),'HermesFullDataOverview.tsv'),splitchar='\t')
    keep = np.array(np.ones(len(data)),bool)
    #-- confined search within given time range
    if time_range is not None:
        if isinstance(time_range,str):
            time_range = _timestamp2datetime(time_range)
            time_range = (time_range,time_range+datetime.timedelta(days=1))
        else:
            time_range = (_timestamp2datetime(time_range[0]),_timestamp2datetime(time_range[1]))
        keep = keep & np.array([(time_range[0]<=_timestamp2datetime(i)<=time_range[1]) for i in data['date-avg']],bool)
        info = None


    #-- search on ID
    if ID is not None:
        info = sesame.search(ID)

        #-- first search on object name only
        ID = ID.replace(' ','').replace('.','').replace('+','').replace('-','').replace('*','')
        match_names = np.array([objectn.replace(' ','').replace('.','').replace('+','').replace('-','').replace('*','') for objectn in data['object']],str)
        keep_id = [((((ID in objectn) or (objectn in ID)) and len(objectn)) and True or False) for objectn in match_names]
        keep_id = np.array(keep_id)
        #   if we found the star on SIMBAD, we use its RA and DEC to match the star
        if info:
            ra,dec = info['jradeg'],info['jdedeg']
            keep_id = keep_id | (np.sqrt((data['ra']-ra)**2 + (data['dec']-dec)**2) < radius/60.)
        keep = keep & keep_id

    if prog_ID is not None:
        keep = keep & (data['prog_id']==prog_ID)

    #-- if some data is found, we check if the C{data_type} string is contained
    #   with the file's name. If not, we remove it.
    if np.any(keep):
        data = data[keep]

        if data_type is not None:
            data_type == data_type.lower()
            #-- now derive the location of the 'data_type' types from the raw
            #   files
            if not data_type=='raw':
                data['filename'] = [_derive_filelocation_from_raw(ff,data_type) for ff in data['filename']]
                existing_files = np.array([ff!='naf' for ff in data['filename']],bool)
                data = data[existing_files]
            seqs = sorted(set(data['unseq']))
        logger.info('ID={}/prog_ID={}: Found {:d} spectra (data type={} with unique unseqs)'.format(ID,prog_ID,len(seqs),data_type))
    else:
        data = data[:0]
        logger.info('%s: Found no spectra'%(ID))

    #-- we now check if the barycentric correction was calculated properly.
    #   If not, we calculate it here, but only if the object was found in
    #   SIMBAD. Else, we have no information on the ra and dec (if bvcorr was
    #   not calculated, ra and dec are not in the header).
    for obs in data:
        if ID is not None and info:
            try:
                jd  = _timestamp2jd(obs['date-avg'])
            except ValueError:
                logger.info('Header probably corrupted for unseq {}: no info on time or barycentric correction'.format(obs['unseq']))
                jd = np.nan
            # the previous line is equivalent to:
            # day = dateutil.parser.parse(header['DATE-AVG'])
            # BJD = ephem.julian_date(day)
            bvcorr, hjd = helcorr(ra/360.*24, dec, jd)
        else:
            break
        if np.isnan(obs['bvcor']):
            logger.info("Corrected 'bvcor' for unseq {} (missing in header)".format(obs['unseq']))
            obs['bvcor'] = float(bvcorr)
        if np.isnan(obs['bjd']):
            logger.info("Corrected 'bjd' for unseq {} (missing in header)".format(obs['unseq']))
            obs['bjd'] = float(hjd)


    #-- do we need the information as a file, or as a numpy array?
    if filename is not None:
        ascii.write_array(data,filename,auto_width=True,header=True)
    else:
        return data
Esempio n. 19
0
def get_photometry(ID=None,to_units='erg/s/cm2/AA',extra_fields=[],include=None,
         exclude=None,**kwargs):
    """
    Collect photometry from different sources.

    The output consists of a record array containing the following keys:

    'meas': the measurement's value directly from the catalog in original units
    'e_meas': the error on the measurements
    'flag': any flags that are associated with the measurement in a catalog
    'unit': the unit of the original measurement
    'source' the source catalog's name
    'photband': photometric pass bands' name
    'cwave': the effective wavelength of the passband
    'cmeas': converted measurement (to C{to_units})
    'e_cmeas': error on converted measurment
    'cunit': converted unit

    Be aware that some of the values can be 'nan': e.g. sometimes no error is
    listed in the catalog, or no flag. Also the 'cwave' column will be nan for
    all photometric colours (e.g. B-V)

    If you define C{extra_fields}, make sure all the {get_photometry} know how
    to handle it: probably some default values need to be inserted if these
    extra columns are not available in some catalog. It is safest just to leave
    it blank.

    You can include or exclude search sources via C{include} and C{exclude}.
    When given, these should be a list containing strings. The default is to
    include C{gator}, C{vizier} and C{gcpd}.

    Extra keyword arguments are passed to each C{get_photometry} functions in
    this package's modules.

    Example usage:

        1. You want to download all available photometry and write the results to
        an ASCII file for later reference.

        >>> master = get_photometry('vega')
        >>> ascii.write_array(master,header=True,auto_width=True)

        2. You want to plot the raw, unmodelled SED of an object:

        >>> master = get_photometry('vega')
        >>> pl.errorbar(master['cwave'],master['cmeas'],yerr=master['e_cmeas'],fmt='ko')
        >>> pl.gca().set_xscale('log',nonposx='clip')
        >>> pl.gca().set_yscale('log',nonposy='clip')

    We made no difference between colors (B-V) and magnitude (V), because the
    'cwave' for colors is 'nan', so they will not be plotted anyway.The final
    two lines are just to correct errorbars that go below zero in a logarithmic
    plot.

    @param ID: the target's name, understandable by SIMBAD
    @type ID: str
    @param to_units: units to convert everything to.
    @type to_units:
    @param include: sources to include
    @type include: list of strings (from C{gator}, C{vizier} or C{gcpd})
    @param exclude: sources to include
    @type exclude: list of strings (from C{gator}, C{vizier} or C{gcpd})
    @return: record array where eacht entry is a photometric measurement
    @rtype: record array
    """
    #-- make sure all catalog names are lower case
    if include is not None: include = [i.lower() for i in include]
    if exclude is not None: exclude = [i.lower() for i in exclude]

    #-- check which sources to include/exclude
    searchables = ['gator','vizier','gcpd','mast']
    if include is not None:
        searchables = include
    if exclude is not None:
        searchables = list( set(searchables)- set(exclude))

    #-- and search photometry
    if 'mast' in searchables:
        kwargs['master'] = mast.get_photometry(ID=ID,to_units=to_units,extra_fields=extra_fields,**kwargs)
    if 'gator' in searchables:
        kwargs['master'] = gator.get_photometry(ID=ID,to_units=to_units,extra_fields=extra_fields,**kwargs)
    if 'vizier' in searchables:
        #-- first query catalogs that can only be queried via HD number
        info = sesame.search(ID=ID,fix=True)
        if 'alias' in info:
            HDnumber = [name for name in info['alias'] if name[:2]=='HD']
            if HDnumber:
                kwargs['master'] = vizier.get_photometry(extra_fields=extra_fields,constraints=['HD=%s'%(HDnumber[0][3:])],sources=['II/83/catalog','V/33/phot'],sort=None,**kwargs)
        #-- then query catalogs that can only be queried via another catalog
        results,units,comms = vizier.search('J/A+A/380/609/table1',ID=ID)
        if results is not None:
            catname = results[0]['Name'].strip()
            kwargs['master'] = vizier.get_photometry(take_mean=True,extra_fields=extra_fields,constraints=['Name={0}'.format(catname)],sources=['J/A+A/380/609/table{0}'.format(tnr) for tnr in range(2,5)],sort=None,**kwargs)
        #-- then query normal catalogs
        kwargs['master'] = vizier.get_photometry(ID=ID,to_units=to_units,extra_fields=extra_fields,**kwargs)


    if 'gcpd' in searchables:
        kwargs['master'] = gcpd.get_photometry(ID=ID,to_units=to_units,extra_fields=extra_fields,**kwargs)
    master = kwargs['master']

    #-- now make a summary of the contents:
    photbands = [phot.split('.')[0]  for phot in master['photband']]
    contents = [(i,photbands.count(i)) for i in sorted(list(set(photbands)))]
    for phot in contents:
        logger.info('%10s: found %d measurements'%phot)
    return master
Esempio n. 20
0
     bvcor = "%11.6f" % float(header['BVCOR'])
 except:
     bvcor = " "
 try:
     bjd = "%14.6f" % float(header['BJD'])
 except:
     bjd = " "
 #
 if obsOnly:
     line = unseq + "\t" + starname + "\t" + program + "\t" + date + "\t" + str(airmass) + "\t" + str(exptime) + "\t" + \
       ra + "\t" + dec + "\t" + observer + "\t" + obsmode + "\t" + bvcor + "\t" + bjd + "\t" + \
       str(usn) + "\t" + str(usignal1sec) + "\t" + str(bsn) + "\t" + str(bsignal1sec) + "\t" + str(vsn) + "\t" + \
       str(vsignal1sec)+  "\t" + str(rsn) + "\t" + str(rsignal1sec)+ "\t"+  str(isn) + "\t" + str(isignal1sec) + "\t" + str(pmtotal)+ "\t" + namefits[i] + "\n"
     overview.write(line)
 else:
     target = sesame.search(starname)
     if target:
         alias = target["alias"]
         #print target["Vel"]["v"]
         try:
             hd = [n for n in alias if n[:2] == "HD"][0]
         except:
             hd = " "
         try:
             hip = [n for n in alias if n[:3] == "HIP"][0]
         except:
             hip = " "
         try:
             Mv = float(target['mag']['V']['v'])
             vref = 10.**(-0.4 * Mv)
             skyq = "%7.5f" % (float(vsignal1sec) / vref / norm)
def getHipData(ID,dtype='ep',outputFileName=None):

    """
    Retrieve Hipparcos epoch/intermediate photometry from the ESA website.
    
    The time series together with some header information is stored in a record
    array and dictionary, and optionally written in a specified file.
    
    The time points are given in barycentric Julian Date and are corrected
    for the offset of 2440000.0 in the original files, but B{only in the output
    record array}. The output files display the B{original contents}.
    
    For epoch photometry, set C{dtype='ep'}.
    For intermediate date, set C{dtype='i'}.
    
    For more information:
    C{http://www.rssd.esa.int/SA-general/Projects/Hipparcos/CATALOGUE_VOL1/sect2_05.ps.gz}
    
    Example:
    
    >>> data,header = getHipData(1234)
    >>> data = data[data['q_mag'] <= 2]         # keep only the good points
    
    To write the retrieved data to a file:
    
    >>> data, header = getHipData(1234 , "myfile.txt")
    
    To store the different columns in separate arrays:
    
    >>> data, header = getHipData(1234)
    >>> time = data['time']
    >>> magnitude = data['mag']
    >>> errorbar = data['e_mag']
    >>> qualityflag = data['q_mag']
    
    In the case of intermediate data products:
        - orbit: orbit number
        - source: source of abscissa (F=FAST, f=rejected FAST, N=NDAC,n=NDAC rejected)
        - d_acosd: partial derivative wrt alpha cos(delta)
        - d_d: partial derivative wrt delta
        - d_pi: partial derivative wrt parallax
        - d_mua: partial derivative wrt proper motion alpha cos(delta)
        - d_mud: partial derivative wrt proper motion delta
    
    @param ID: identification of the star: if you give an integer or string that
    can be converted to an integer, it is assumed to be the hipparcos number of
    the star.  E.g. 1234 or "1234". If it is not an integer, the star will
    be resolved via sesame to get the HIP number if possible
    @type ID: integer or string
    @param dtype: data type (epoch ('ep') photometry or intermediate ('i') data)
    @type dtype: string (one of ('ep','i'))
    @param outputFileName: the name of the file that will be created
                           to save the Hipparcos time series
    @type outputFileName: string
    @return: record array with fields time, mag, e_mag (errorbar), 
             q_mag (quality flag), and a dictionary containing the 
             header information. The header dictionary is of style
             {'HH14': ('A', 'Annex flag (light curves)'), ...}
    @rtype: rec array, dict
    """

    server = "www.rssd.esa.int"
    webpage = "/hipparcos_scripts/HIPcatalogueSearch.pl?hip%sId="%(dtype)
    
    # Resolve the name if necessary (i.e., if it's not a HIP number). If the 
    # star has no HIP number, log an error and return None
    try:
        hipnr = int(ID)
    except ValueError:
        info = sesame.search(ID,db='S')
        IDs = [alias for alias in info['alias'] if 'HIP' in alias]
        if len(IDs)!=1:
            logger.error("Data retrieval for %s not possible. Reason: no HIP number resolved" % (ID))
            return
        hipnr = IDs[0].split(' ')[1]
    
    # Connect to the website, en retrieve the wanted webpage
    
    conn = httplib.HTTPConnection(server)
    conn.request("GET", webpage + str(hipnr))
    response = conn.getresponse()
    if response.reason != "OK":
        logger.error("Data retrieval for HIP%s not possible. Reason: %s" % (str(hipnr), response.reason))
        return
    else:
        logger.info("Data retrieval for HIP%s: OK" % str(hipnr))
        
    contents = response.read()
    conn.close()
    
    # Parse the webpage, to remove the html codes (line starts with <").
    # Put a "#" in front of the header information, and format nicely.
    # Write to the output file if asked for.
    
    data = []
    header = {}
    
    if outputFileName:
        outputFile = open(outputFileName,'w')
    
    for line in contents.split('\n'):
        if line == "": continue
        if not line.startswith("<"):
            line = line.replace("\r", "")
            
            # This is the header
            
            if not line[0].isdigit():
                sline = line.split(':')
                
                # Only keep header entries of the style 
                # "key: value information" in the dictionary
                
                if len(sline)==2:
                    key,info = sline
                    info = info.split()
                    header[key] = (info[0]," ".join(info[1:]))
                if outputFileName:
                    line = "# " + line
                    
            # This is the real contents
            
            else:
                data.append(line.split('|'))
                #-- correct for empty fields
                data[-1] = tuple([(entry.replace(' ','')=='' and np.nan or entry) for entry in data[-1]])
            if outputFileName:
                outputFile.write(line + "\n")
    if outputFileName:
        outputFile.close()
    
    # Make a record array.
    # Choose the header names to be in the VizieR style.
    
    if dtype=='ep':
        dtypes = [('time','f8'),('mag','f8'),('e_mag','f8'),('q_mag','i')]
    elif dtype=='i':
        dtypes = [('orbit','i'),('source','a1'),
                  ('d_acosd','f8'),('d_d','f8'),('d_pi','f8'),
                  ('d_mua','f8'),('d_mud','f8'),
                  ('abs_res','f8'),('abs_std','f8'),('cor','f8')]
    data = np.rec.array(data,dtype=dtypes)
    
    # Fix the time offset
    if dtype=='ep':
        data['time'] += 2440000.0
    
    
    return data,header
Esempio n. 22
0
def search(ID=None,time_range=None,prog_ID=None,data_type='cosmicsremoved_log',
           radius=1.,filename=None):
    """
    Retrieve datafiles from the Hermes catalogue.

    B{If C{ID} is given}: A string search is performed to match the 'object'
    field in the FITS headers. The coordinates are pulled from SIMBAD. If the
    star ID is recognised by SIMBAD, an additional search is done based only on
    the coordinates. The union of both searches is the final result.

    B{If C{time_range} is given}: The search is confined within the defined
    range. If you only give one day, the search is confined to the observations
    made during the night starting at that day. If C{ID} is not given, all
    observations will be returned of the given datatype.

    B{If C{prog_ID} is given}: The search is performed to match the number of
    the program. Individual stars are not queried in SIMBAD, so any information
    that is missing in the header will not be corrected.

    If you don't give either ID or time_range, the info on all data will be
    returned. This is a huge amount of data, so it can take a while before it
    is returned. Remember that the header of each spectrum is read in and checked.

    Data type can be any of:
        1. cosmicsremoved_log: return log merged without cosmics
        2. cosmicsremoved_wavelength: return wavelength merged without cosmics
        3. ext_log: return log merged with cosmics
        4. ext_wavelength: return wavelength merged with cosmics
        5. raw: raw files (also TECH..., i.e. any file in the raw directory)

    This functions needs a C{HermesFullDataOverview.tsv} file located in one
    of the datadirectories from C{config.py}, and subdirectory C{catalogs/hermes}.

    If this file does not exist, you can create it with L{make_data_overview}.

    If you want a summary file with the data you search for, you can give
    C{filename} as an extra keyword argument. The results will be saved to that
    file.

    The columns in the returned record array are listed in L{make_data_overview},
    but are repeated here (capital letters are directly retrieved from the
    fits header, small letters are calculated values. The real header strings
    are all small capitals):

        1.  UNSEQ
        2.  PROG_ID
        3.  OBSMODE
        4.  BVCOR
        5.  OBSERVER
        6.  OBJECT
        7.  RA
        8.  DEC
        9.  BJD
        10. EXPTIME
        11. PMTOTAL
        12. DATE-AVG
        13. OBJECT
        14. airmass
        15. filename

    The column C{filename} contains a string with the absolute location of the
    file. If you need any extra information from the header, you can easily
    retrieve it.

    If BVCOR or BJD are not available from the FITS header, this function will
    attempt to calculate it. It will not succeed if the object's name is not
    recognised by SIMBAD.

    Example usage: retrieve all data on HD50230

    >>> mydata = search('HD50230')

    Keep only those with a long enough exposure time:

    >>> myselection = mydata[mydata['exptime']>500]

    Look up the 'telalt' value in the FITS headers of all these files via a fast
    list comprehension:

    >>> telalts = [pf.getheader(fname)['telalt'] for fname in myselection['filename']]

    Search for all data of HD50230 taken in the night of 22 September 2009:

    >>> data = search('HD50230',time_range='2009-9-22')

    Or within an interval of a few days:

    >>> data = search('HD50230',time_range=('2009-9-22','2009-9-30'))

    Search for all data observed in a given night:

    >>> data = search(time_range='2009-9-22')

    B{Warning:} the heliocentric correction is not calculated when no ID is given,
    so make sure it is present in the header if you need it, or calculate it yourself.

    @param ID: ID of the star, understandable by SIMBAD
    @type ID: str
    @param time_range: range of dates to confine the search to
    @type time_range: tuple strings of type '2009-09-23T04:24:35.712556' or '2009-09-23'
    @param data_type: if None, all data will be returned. Otherwise, subset
    'cosmicsremoved', 'merged' or 'raw'
    @type data_type: str
    @param radius: search radius around the coordinates (arcminutes)
    @type radius: float
    @param filename: write summary to outputfile if not None
    @type filename: str
    @return: record array with summary information on the observations, as well
    as their location (column 'filename')
    @rtype: numpy rec array
    """
    #-- read in the data from the overview file, and get SIMBAD information
    #   of the star
    ctlFile = '/STER/mercator/hermes/HermesFullDataOverview.tsv'
    data = ascii.read2recarray(ctlFile, splitchar='\t')
    #data = ascii.read2recarray(config.get_datafile(os.path.join('catalogs','hermes'),'HermesFullDataOverview.tsv'),splitchar='\t')
    keep = np.array(np.ones(len(data)),bool)
    #-- confined search within given time range
    if time_range is not None:
        if isinstance(time_range,str):
            time_range = _timestamp2datetime(time_range)
            time_range = (time_range,time_range+datetime.timedelta(days=1))
        else:
            time_range = (_timestamp2datetime(time_range[0]),_timestamp2datetime(time_range[1]))
        keep = keep & np.array([(time_range[0]<=_timestamp2datetime(i)<=time_range[1]) for i in data['date-avg']],bool)
        info = None


    #-- search on ID
    if ID is not None:
        info = sesame.search(ID)

        #-- first search on object name only
        ID = ID.replace(' ','').replace('.','').replace('+','').replace('-','').replace('*','')
        match_names = np.array([objectn.replace(' ','').replace('.','').replace('+','').replace('-','').replace('*','') for objectn in data['object']],str)
        keep_id = [((((ID in objectn) or (objectn in ID)) and len(objectn)) and True or False) for objectn in match_names]
        keep_id = np.array(keep_id)
        #   if we found the star on SIMBAD, we use its RA and DEC to match the star
        if info:
            ra,dec = info['jradeg'],info['jdedeg']
            keep_id = keep_id | (np.sqrt((data['ra']-ra)**2 + (data['dec']-dec)**2) < radius/60.)
        keep = keep & keep_id

    if prog_ID is not None:
        keep = keep & (data['prog_id']==prog_ID)

    #-- if some data is found, we check if the C{data_type} string is contained
    #   with the file's name. If not, we remove it.
    if np.any(keep):
        data = data[keep]

        if data_type is not None:
            data_type == data_type.lower()
            #-- now derive the location of the 'data_type' types from the raw
            #   files
            if not data_type=='raw':
                data['filename'] = [_derive_filelocation_from_raw(ff,data_type) for ff in data['filename']]
                existing_files = np.array([ff!='naf' for ff in data['filename']],bool)
                data = data[existing_files]
            seqs = sorted(set(data['unseq']))
        logger.info('ID={}/prog_ID={}: Found {:d} spectra (data type={} with unique unseqs)'.format(ID,prog_ID,len(seqs),data_type))
    else:
        data = data[:0]
        logger.info('%s: Found no spectra'%(ID))

    #-- we now check if the barycentric correction was calculated properly.
    #   If not, we calculate it here, but only if the object was found in
    #   SIMBAD. Else, we have no information on the ra and dec (if bvcorr was
    #   not calculated, ra and dec are not in the header).
    for obs in data:
        if ID is not None and info:
            try:
                jd  = _timestamp2jd(obs['date-avg'])
            except ValueError:
                logger.info('Header probably corrupted for unseq {}: no info on time or barycentric correction'.format(obs['unseq']))
                jd = np.nan
            # the previous line is equivalent to:
            # day = dateutil.parser.parse(header['DATE-AVG'])
            # BJD = ephem.julian_date(day)
            bvcorr, hjd = helcorr(ra/360.*24, dec, jd)
        else:
            break
        if np.isnan(obs['bvcor']):
            logger.info("Corrected 'bvcor' for unseq {} (missing in header)".format(obs['unseq']))
            obs['bvcor'] = float(bvcorr)
        if np.isnan(obs['bjd']):
            logger.info("Corrected 'bjd' for unseq {} (missing in header)".format(obs['unseq']))
            obs['bjd'] = float(hjd)


    #-- do we need the information as a file, or as a numpy array?
    if filename is not None:
        ascii.write_array(data,filename,auto_width=True,header=True)
    else:
        return data