Esempio n. 1
0
def main():
    from optparse import OptionParser
    parser = OptionParser(option_class=magicdate.MagicDateOption,
                          usage='%prog [options]',
                          version='%prog ' + __version__)
    parser.add_option('-s',
                      '--start-date',
                      type='magicdate',
                      default=None,
                      help='Force a start time (magicdate)')
    parser.add_option(
        '-e',
        '--end-date',
        type='magicdate',
        default=magicdate.magicdate('today'),
        help='Force an end  time (magicdate) [default: %default]')
    parser.add_option(
        '-l',
        '--log-format',
        default='%Y%m%d.log',
        help=
        'datetime strftime format string to create the log file names to search for [default: %default]'
    )
    (options, args) = parser.parse_args()

    assert options.start_date is not None
    for d in date_generator(options.start_date, options.end_date):
        #print d.strftime(options.log_format)
        filename = d.strftime(options.log_format)
        try:
            s = os.stat(filename)
        except OSError:
            print 'missing', filename
Esempio n. 2
0
def reports_for_date(date_string):
    try:
        date_object = magicdate(date_string)
        output_list = parser.earnings_reports_on_date(date_object)
        return jsonify(earnings_reports=output_list)
    except:
        print('Could not parse date')
        return jsonify(earning_events=None)
Esempio n. 3
0
    def do_once(self):
        cx = self.cx
        cu = self.cu
        v = self.verbose

        tzoffset = datetime.timedelta(
            seconds=time.timezone)  # Always work in utc.  Magicdate is local

        if self.track_start is not None:
            track_start = mark_utc(
                magicdate.magicdate(self.track_start) + tzoffset)
            if v:
                cu.execute(
                    'SELECT COUNT(*) FROM track_lines WHERE update_timestamp < %s;',
                    (track_start, ))
                print 'Deleting from track_lines:', cu.fetchone()[0]

            cu.execute('DELETE FROM track_lines WHERE update_timestamp < %s;',
                       (track_start, ))

            # if the points are too old to be in a track_line, then delete them
            if v:
                cu.execute(
                    'SELECT COUNT(*) FROM position WHERE cg_timestamp < %s;',
                    (track_start, ))
                print 'Deleting from position:', cu.fetchone()[0]

            cu.execute('DELETE FROM position WHERE cg_timestamp < %s;',
                       (track_start, ))

        if self.last_position_start is not None:
            last_position_start = mark_utc(
                magicdate.magicdate(self.last_position_start) + tzoffset)

            if v:
                cu.execute(
                    'SELECT COUNT(*) FROM last_position WHERE cg_timestamp < %s;',
                    (track_start, ))
                print 'Deleting from last_position:', cu.fetchone()[0]

            cu.execute('DELETE FROM last_position WHERE cg_timestamp < %s;',
                       (last_position_start, ))

        if self.last_position_start is not None or self.track_start is not None:
            cx.commit()
Esempio n. 4
0
def get_container_by_channel(slug,
                             number=10,
                             depth=1,
                             include_children=True,
                             **kwargs):
    box = None
    magic_date = kwargs.pop('magic_date', False)
    date = timezone.now()
    if magic_date:
        try:
            date = magicdate(magic_date)
        except Exception:
            pass

    if include_children:
        try:
            kwargs['channel_long_slug__in'] = cache.get(
                'get_container_by_channel-{}'.format(slug))
            if not kwargs['channel_long_slug__in']:
                base_channel = Channel.objects.get(long_slug=slug)
                kwargs['channel_long_slug__in'] = [base_channel.long_slug]

                def _append_recursivelly(channel, current_level=0):
                    # Depth test
                    if current_level >= depth:
                        return
                    elif current_level < depth:
                        current_level += 1

                    for children in channel.get_children():
                        kwargs['channel_long_slug__in'].append(
                            children.long_slug)
                        # Recursion
                        _channel = Channel.objects.get(
                            long_slug=children.long_slug)
                        _append_recursivelly(_channel, current_level)

                _append_recursivelly(base_channel)
                cache.set('get_container_by_channel-{}'.format(slug),
                          kwargs['channel_long_slug__in'],
                          settings.OPPS_CACHE_EXPIRE)
        except Channel.DoesNotExist:
            kwargs['channel_long_slug__in'] = []
    try:
        kwargs['site'] = settings.SITE_ID
        if settings.OPPS_CONTAINERS_SITE_ID:
            kwargs['site'] = settings.OPPS_CONTAINERS_SITE_ID
        kwargs['show_on_root_channel'] = include_children
        kwargs['date_available__lte'] = date
        kwargs['published'] = True
        box = Container.objects.distinct().filter(
            **kwargs).order_by('-date_available')[:number]
    except:
        pass
    return box
Esempio n. 5
0
 def do_process(self, input_value):
     if isinstance(input_value, tuple):
         input_value = input_value[0]
     try:
         return (int(input_value),)
     except ValueError:
         try:
             datetime = magicdate.magicdate(input_value)
             return (datetime.year,)
         except:
             return None
Esempio n. 6
0
 def do_process(self, input_value):
     if isinstance(input_value, tuple):
         input_value = input_value[0]
     try:
         return (int(input_value), )
     except ValueError:
         try:
             datetime = magicdate.magicdate(input_value)
             return (datetime.year, )
         except:
             return None
Esempio n. 7
0
def get_container_by_channel(slug, number=10, depth=1,
                             include_children=True, **kwargs):
    box = None
    magic_date = kwargs.pop('magic_date', False)
    date = timezone.now()

    if magic_date:
        try:
            date = magicdate(magic_date)
        except Exception:
            pass

    # __in split treatment
    splited = dict([
        (key, value.split(','))
        for key, value
        in kwargs.items()
        if key.endswith('__in') and type(value) is not list])
    kwargs.update(splited)

    if include_children:
        k = 'channel_id__in'
        kwargs[k] = cache.get(
            'get_container_by_channel-{0}'.format(slug))
        if not kwargs[k]:

            try:
                channel = Channel.objects.get(long_slug=slug)
                qs = channel.get_descendants(include_self=True)
                qs = qs.filter(level__lte=channel.level + depth)
                kwargs[k] = \
                    qs.values_list("id", flat=True)
                cache.set(
                    'get_container_by_channel-{0}'.format(slug),
                    kwargs[k],
                    settings.OPPS_CACHE_EXPIRE)

            except Channel.DoesNotExist:
                kwargs[k] = []

    try:
        kwargs['site'] = settings.SITE_ID
        if settings.OPPS_CONTAINERS_SITE_ID:
            kwargs['site'] = settings.OPPS_CONTAINERS_SITE_ID
        kwargs['show_on_root_channel'] = include_children
        kwargs['date_available__lte'] = date
        kwargs['published'] = True
        box = Container.objects.distinct().filter(
            **kwargs).order_by('-date_available')[:number]
    except:
        pass
    return box
Esempio n. 8
0
def get_container_by_channel(slug, number=10, depth=1,
                             include_children=True, **kwargs):
    box = None
    magic_date = kwargs.pop('magic_date', False)
    date = timezone.now()
    if magic_date:
        try:
            date = magicdate(magic_date)
        except Exception:
            pass

    if include_children:
        try:
            kwargs['channel_long_slug__in'] = cache.get(
                'get_container_by_channel-{}'.format(slug))
            if not kwargs['channel_long_slug__in']:
                base_channel = Channel.objects.get(long_slug=slug)
                kwargs['channel_long_slug__in'] = [base_channel.long_slug]

                def _append_recursivelly(channel, current_level=0):
                    # Depth test
                    if current_level >= depth:
                        return
                    elif current_level < depth:
                        current_level += 1

                    for children in channel.get_children():
                        kwargs['channel_long_slug__in'].append(
                            children.long_slug)
                        # Recursion
                        _channel = Channel.objects.get(
                            long_slug=children.long_slug)
                        _append_recursivelly(_channel, current_level)

                _append_recursivelly(base_channel)
                cache.set('get_container_by_channel-{}'.format(slug),
                          kwargs['channel_long_slug__in'],
                          settings.OPPS_CACHE_EXPIRE)
        except Channel.DoesNotExist:
            kwargs['channel_long_slug__in'] = []
    try:
        kwargs['site'] = settings.SITE_ID
        if settings.OPPS_CONTAINERS_SITE_ID:
            kwargs['site'] = settings.OPPS_CONTAINERS_SITE_ID
        kwargs['show_on_root_channel'] = include_children
        kwargs['date_available__lte'] = date
        kwargs['published'] = True
        box = Container.objects.distinct().filter(
            **kwargs).order_by('-date_available')[:number]
    except:
        pass
    return box
Esempio n. 9
0
def guess(s, parse=True, is_gmt=False, set_gmt=False,
             try_iso=True, try_num=True, try_en=True):
    """Guess the format, and optionally parse, the input string.

    If 'is_gmt' is True, assume timezone is GMT when not given.
    Otherwise, assume localtime.
    If 'set_gmt' is True then set the timezone to GMT, otherwise
    set it to localtime.

    The answer is a pair containing the guessed format and, if the 'parse'
    flag was given, the parsed value as seconds since the epoch, otherwise None.

    The format is a constant defined in this module:
      UNKNOWN - Cannot guess the format (associated value is None)
      ISO8601 - This is a prefix of the ISO8601 format accepted by completeISO()
      ENGLISH - This is an natural English-language format accepted by makeISO()
      SECONDS - This is seconds since the UNIX epoch (Midnight on 1970/1/1).
    """
    if not(s):
        return UNKNOWN, None
    sec = None
    s = s.strip()
    # try ISO8601
    if try_iso:
        m =  ISO_DATE_PARTS.match(s)
        if m and m.start() == 0 and m.end() == len(s):
            if parse:
                if s[-1] == 'Z':
                    # explicit timezone overrides option
                    is_gmt = True
                iso_s = completeISO(s, is_gmt=is_gmt, set_gmt=set_gmt)
                sec = parseISO(iso_s)
            return ISO8601, sec
    # try number
    if try_num:
        m = NUMBER_DATE.match(s)
        if m and m.start() == 0 and m.end() == len(s):
            if parse:
                sec = float(s)
            return SECONDS, sec
    # try natural language
    if try_en:
        try:
            d = magicdate.magicdate(s)
        except Exception, E:
            d = None
        if d is not None:
            if parse:
                partial_iso = d.isoformat()
                iso = completeISO(partial_iso, is_gmt=False, set_gmt=set_gmt)
                sec = parseISO(iso)
            return ENGLISH, sec
Esempio n. 10
0
def get_container_by_channel(slug,
                             number=10,
                             depth=1,
                             include_children=True,
                             **kwargs):
    box = None
    magic_date = kwargs.pop('magic_date', False)
    date = timezone.now()

    if magic_date:
        try:
            date = magicdate(magic_date)
        except Exception:
            pass

    # __in split treatment
    splited = dict([(key, value.split(',')) for key, value in kwargs.items()
                    if key.endswith('__in') and type(value) is not list])
    kwargs.update(splited)

    if include_children:
        k = 'channel_id__in'
        kwargs[k] = cache.get('get_container_by_channel-{0}'.format(slug))
        if not kwargs[k]:

            try:
                channel = Channel.objects.get(long_slug=slug)
                qs = channel.get_descendants(include_self=True)
                qs = qs.filter(level__lte=channel.level + depth)
                kwargs[k] = \
                    qs.values_list("id", flat=True)
                cache.set('get_container_by_channel-{0}'.format(slug),
                          kwargs[k], settings.OPPS_CACHE_EXPIRE)

            except Channel.DoesNotExist:
                kwargs[k] = []

    try:
        kwargs['site'] = settings.SITE_ID
        if settings.OPPS_CONTAINERS_SITE_ID:
            kwargs['site'] = settings.OPPS_CONTAINERS_SITE_ID
        kwargs['show_on_root_channel'] = include_children
        kwargs['date_available__lte'] = date
        kwargs['published'] = True
        box = Container.objects.distinct().filter(
            **kwargs).order_by('-date_available')[:number]
    except:
        pass
    return box
Esempio n. 11
0
    def do_once(self):
        cx = self.cx
        cu = self.cu
        v = self.verbose

        tzoffset = datetime.timedelta(seconds=time.timezone) # Always work in utc.  Magicdate is local


        if self.track_start is not None:
            track_start = mark_utc(magicdate.magicdate(self.track_start)+tzoffset )
            if v:
                cu.execute('SELECT COUNT(*) FROM track_lines WHERE update_timestamp < %s;', (track_start,))
                print 'Deleting from track_lines:',cu.fetchone()[0]

            cu.execute('DELETE FROM track_lines WHERE update_timestamp < %s;', (track_start,))

            # if the points are too old to be in a track_line, then delete them
            if v:
                cu.execute('SELECT COUNT(*) FROM position WHERE cg_timestamp < %s;', (track_start,))
                print 'Deleting from position:',cu.fetchone()[0]

            cu.execute('DELETE FROM position WHERE cg_timestamp < %s;', (track_start,))



        if self.last_position_start is not None:
            last_position_start = mark_utc(magicdate.magicdate(self.last_position_start)+tzoffset )

            if v:
                cu.execute('SELECT COUNT(*) FROM last_position WHERE cg_timestamp < %s;', (track_start,))
                print 'Deleting from last_position:',cu.fetchone()[0]

            cu.execute('DELETE FROM last_position WHERE cg_timestamp < %s;', (last_position_start,))

        if self.last_position_start is not None or self.track_start is not None:
            cx.commit()
Esempio n. 12
0
def main():
    from optparse import OptionParser
    parser = OptionParser(option_class=magicdate.MagicDateOption,
                          usage='%prog [options]',
                          version='%prog '+__version__)
    parser.add_option('-s', '--start-date', type='magicdate', default=None, help='Force a start time (magicdate)')
    parser.add_option('-e', '--end-date',   type='magicdate', default=magicdate.magicdate('today'), help='Force an end  time (magicdate) [default: %default]')
    parser.add_option('-l', '--log-format', default='%Y%m%d.log', help='datetime strftime format string to create the log file names to search for [default: %default]')
    (options,args) = parser.parse_args()

    assert options.start_date is not None
    for d in date_generator(options.start_date, options.end_date):
        #print d.strftime(options.log_format)
        filename = d.strftime(options.log_format)
        try:
            s = os.stat(filename)
        except OSError:
            print 'missing',filename
Esempio n. 13
0
def makeISO(value, is_gmt=False, set_gmt=False):
    """If value is a tuple, assume it is the one returned by time.gmtime() or time.localtime()
    Otherwise, assume value is an English language description (for partial ISO
    strings, use completeISO() instead).

    Return an ISO8601 string, with timezone set to GMT or localtime.
    """
    tz_str = 'Z' # assume GMT
    if isinstance(value,tuple) or isinstance(value,list):
        fmt = ("%04d","-%02d","-%02d","T%02d",":%02d", ":%02d")
        s = ''.join([f % v for f,v in zip(fmt,value)])
        if not gmt:
            tz_str = getLocaltimeISO(value)
        iso = s + tz_str
    else:
        try:
            d = magicdate.magicdate(value)
        except Exception, E:
            raise ValueError("magicdate cannot parse '%s'" % value)
        partial_iso = d.isoformat()
        iso = completeISO(partial_iso, is_gmt=is_gmt, set_gmt=set_gmt)
Esempio n. 14
0
from boto.s3.key import Key
import magicdate
import gzip
import json
import glob
import logging
from filechunkio import FileChunkIO
import math
import shutil
from pprint import pprint
import subprocess

print "Daily file tody up. Pre S3 push"

# we are bundling the previous days data.
yesterday = magicdate.magicdate('yesterday')

dir_file = gzip.open("/home/TfL_feeds/directory_data.json.gz")
dir_json = json.load(dir_file)
os.chdir(dir_json['home_directory'] + "/data/")

# Define folder name within local directory
folder_name = str(yesterday)

message = "Tarring and compressing " + str(folder_name)
print message

folder_exists = os.path.isdir(folder_name)
folder = (folder_name + ".tar.gz")

print "Working on, ", str(folder)
Esempio n. 15
0
 def __setitem__(self, key, value) :
     new_value = value
     if key == 'Date' :
         new_value = magicdate(value)
     self._dict.__setitem__(key, new_value)
Esempio n. 16
0
def bag2kmlbbox(in_name,
                out_file,
                title=None,
                kml_complete=False,
                verbose=False,
                placemark=False):
    v = verbose
    f = h5py.File(in_name)  #'H11302_OLS_OSS/H11302_2m_1.bag')
    #o = file(out_name,'w')
    # FIX: if out_file is a string, then open
    o = out_file
    #print f.listobjects()
    #print f.listitems()

    bag_root = f['/BAG_root']
    metadata_xml = ''.join(bag_root['metadata'])
    #o = file('metadata.xml','w')
    #o.write(metadata_xml)
    #del o

    #root = etree.parse(StringIO(metadata_xml)).getroot()
    #root = etree.parse(StringIO(metadata_xml.replace('smXML:',''))).getroot()
    root = etree.XML(metadata_xml.replace('smXML:', ''))

    xmin = float(root.xpath('//*/westBoundLongitude')[0].text)
    xmax = float(root.xpath('//*/eastBoundLongitude')[0].text)

    ymin = float(root.xpath('//*/southBoundLatitude')[0].text)
    ymax = float(root.xpath('//*/northBoundLatitude')[0].text)

    # WARNING: This date does not relate to the dates the survey was collected!
    date = root.xpath('//*/CI_Date/date')[0].text
    abstract = root.xpath('//*/abstract')[0].text

    timestamp = ''  # No timestamp if we can't handle it
    try:
        import datetime, magicdate
        adate = magicdate.magicdate(date)
        timestamp = '<TimeStamp>' + adate.strftime(
            iso8601_timeformat) + '</TimeStamp>'
    except:
        print 'WARNING: Unable to handle timestamp:', date

    if v:
        print xmin, xmax, '->', ymin, ymax
        print 'date:', date
        print 'abstract:', abstract

    #import subprocess
    #p = subprocess.Popen(
    #    ['source-highlight','-s', 'xml', '--out-format=html'],
    #    stdin=subprocess.PIPE,
    #    stdout=subprocess.PIPE
    #    )
    #metadata_html = p.communicate(input=etree.tostring(root, pretty_print=True ) ) [0]

    metadata_html = etree.tostring(root, pretty_print=True).replace(
        '</', ' ').replace('<', ' ').replace('>',
                                             ' ')  #.replace('\n','<br/>\n')

    if v: print metadata_html

    if not title:
        title = '%s : %s' % (abstract, date)

    kml_data = {
        'title': title,
        'x': (xmin + xmax) / 2.,
        'y': (ymin + ymax) / 2.,
        'xmin': xmin,
        'xmax': xmax,
        'ymin': ymin,
        'ymax': ymax,
        'metadata': metadata_html,
        'timestamp': timestamp,
    }

    #o = file('out.kml','w')
    if kml_complete:
        o.write('''<?xml version="1.0" encoding="UTF-8"?>
<kml xmlns="http://www.opengis.net/kml/2.2" xmlns:gx="http://www.google.com/kml/ext/2.2" xmlns:kml="http://www.opengis.net/kml/2.2" xmlns:atom="http://www.w3.org/2005/Atom">
<Document>''')

    if placemark:
        o.write('''
	<Placemark>
		<name>{title}</name>
		{timestamp}
		<description><![CDATA[
<pre>
{metadata}
</pre>
]]>
		</description>
		<Point> <coordinates> {x},{y},0 </coordinates> </Point>
	</Placemark>
''')
    o.write('''
	<Placemark>
		<name>{title}</name>
		{timestamp}
		<LineString>
			<coordinates>
{xmin},{ymin},0
{xmax},{ymin},0
{xmax},{ymax},0
{xmin},{ymax},0
{xmin},{ymin},0
			</coordinates>
		</LineString>
	</Placemark>
'''.format(**kml_data))

    if kml_complete:
        o.write('''</Document>
</kml>
''')

    return
Esempio n. 17
0
                    # Invalid record, skip it.
                    continue
                else:
                    output_list.append(tmp_dict)
            except Exception, e:
                continue
    return output_list


def earnings_report_date_for_symbol(symbol):

    url_string = 'http://biz.yahoo.com/rr/?s=' + symbol + '&d=research%2Fearncal'

    print url_string

    try:
        page = urllib2.urlopen(url_string)
        soup = BeautifulSoup(page, 'html.parser')
    except Exception, e:
        print e
        return None

    date_element = soup.find_all("font", attrs={'face': 'arial', 'size': '+1'})[0]
    date_text = date_element.find('b').string.replace('\n', ' ')

    sanitized_date_text = date_text.replace('US Earnings Calendar for ', '')

    output_date = str(magicdate(sanitized_date_text))

    return output_date
Esempio n. 18
0
def add_bag_to_db(cx, infile_name, survey, filename_base, verbose=False, write_xml=False):
    # filename_base - without .bag or path
    #print ('file:',infile_name, file=sys.stderr)

    v = verbose
    #if v:
    #    print ('opening:',infile_name,os.path.getsize(infile_name))
    f = h5py.File(infile_name) #'H11302_OLS_OSS/H11302_2m_1.bag')
    #o = file('foo.out','w')

    bag_root = f['/BAG_root']
    metadata_xml = ''.join(bag_root['metadata'])
    #o = file('metadata.xml','w')
    #o.write(metadata_xml)
    #del o

    #root = etree.parse(StringIO(metadata_xml)).getroot()
    #root = etree.parse(StringIO(metadata_xml.replace('smXML:',''))).getroot()
    try:
        root = etree.XML(metadata_xml.replace('smXML:',''))
    except:
        print ('bad_metadata:',infile_name) # What can we do?
        return # ouch... better if we could try to fix it somehow
        

    x_min_metadata = float(root.xpath('//*/westBoundLongitude')[0].text)
    x_max_metadata = float(root.xpath('//*/eastBoundLongitude')[0].text)

    y_min_metadata = float(root.xpath('//*/southBoundLatitude')[0].text)
    y_max_metadata = float(root.xpath('//*/northBoundLatitude')[0].text)

    software = root.xpath('//*/BAG_ProcessStep/description')[0].text
    #print ('software:',software)

    utm_zone = int(root.xpath('//*/zone')[0].text)
    # The WGS84 geographic is often foulded up.

    utm_coords = root.xpath('//*/gml:coordinates', namespaces={'gml':"http://www.opengis.net/gml"})[0].text
    #print ('\t',utm_coords)
    utm_coords = utm_coords.split()
    utm_x_min,utm_y_min = [float(coord) for coord in utm_coords[0].split(',')]
    utm_x_max,utm_y_max = [float(coord) for coord in utm_coords[1].split(',')]

    params = {'proj':'utm', 'zone':utm_zone}
    proj = Proj(params)

    x_min,y_min = proj(utm_x_min,utm_y_min, inverse=True)
    x_max,y_max = proj(utm_x_max,utm_y_max, inverse=True)
    #print ('\t',utm_x_min,utm_y_min, utm_x_max,utm_y_max)
    #print ('\t\t',x_min,y_min,x_max,y_max)
    #print ('\t\t',x_min_metadata,y_min_metadata,x_max_metadata,y_max_metadata)
    if abs(x_max - x_max_metadata) > 0.05 or abs(y_max - y_max_metadata) > 0.05:
        print ('%s: %.4f %.4f %.4f %.4f' % (filename_base,
            x_min - x_min_metadata,y_min - y_min_metadata,
            x_max - x_max_metadata,y_max - y_max_metadata)
               )

    vdatum = None
    datums = [entry.text.strip() for entry in root.xpath('//*/datum/RS_Identifier/code')]
    if len(datums)==0:
        pass
    elif 'MLLW' in datums: vdatum = 'MLLW'
    else:
        vdatum = datums[-1] # just guess that it is the last one
        print('datums:',datums,'->',vdatum,filename_base)
        
    axes = (root.xpath('//*/axisDimensionProperties'))
    dx = dy = None
    width = height = None
    for axis in axes:
        #print(etree.tostring(axis, pretty_print=True))
        dim_name = axis.xpath('*/dimensionName')[0].text
        dim_size = int(axis.xpath('*/dimensionSize')[0].text)
        delta = float(axis.xpath('*/*/*/value')[0].text)
        #print ('dim_name: "%s"' % (dim_name,))
        if 'row' == dim_name:
            dy = delta
            height = dim_size
        elif 'column' == dim_name:
            dx = delta
            width = dim_size
        else:
            print ('ERROR: unable to handle dim',dim_name)
            assert False

    # WARNING: This date does not relate to the dates the survey was collected!
    date = root.xpath('//*/CI_Date/date')[0].text 
    abstract = root.xpath('//*/abstract')[0].text
    title = root.xpath('//*/title')[0].text

    timestamp = '' # No timestamp if we can't handle it
    try:
        import datetime, magicdate
        #timestamp = magicdate.magicdate(date)
        creation = magicdate.magicdate(date)
        #timestamp = adate.strftime(iso8601_timeformat) 
    except:
        print ('WARNING: Unable to handle timestamp:',date)
        creation = None

#    if v:
#        print (x_min,x_max,'->',y_min,y_max)
#        print ('date:',date)
#        print ('abstract:',abstract)

    metadata_txt = etree.tostring(root, pretty_print=True ).replace('</',' ').replace('<',' ').replace('>',' ') #.replace('\n','<br/>\n')

    # FIX: base url must change based on the number of the survey
    base_url = 'http://surveys.ngdc.noaa.gov/mgg/NOS/coast/H10001-H12000/'
    dr_url = base_url + survey + '/DR/' + survey + '.pdf'
    bag_url = base_url + survey + '/BAG/' + filename_base + '.bag.gz'

    sql_field_names = ('file', 'survey', 'title','abstract', 'survey', 'creation', 'x_min', 'y_min', 'x_max', 'y_max', 'width', 'height', 'dx', 'dy', 'vdatum', 'utm_zone', 'dr_url', 'bag_url', 'metadata_txt','metadata_xml', 'utm_x_min','utm_y_min' ,  'utm_x_max' ,'utm_y_max', 'software')

    file = filename_base

    # check for errors
    #for field in sql_field_names:
    #    print('%s:' % (field,) ,locals()[field])

    sql_insert = 'INSERT INTO bag (' + ','.join(sql_field_names) + ') VALUES (' + ', '.join([':%s' %(field,) for field in sql_field_names ]) + ');'

    #print (bag_data)
    #print (sql_insert)
    cx.execute(sql_insert,locals()) # Passing locals sees crazy
    cx.commit()
Esempio n. 19
0
def bag2kmlbbox(in_name, out_file, title=None, kml_complete=False, verbose=False, placemark=False):
    v = verbose
    f = h5py.File(in_name)  #'H11302_OLS_OSS/H11302_2m_1.bag')
    # o = file(out_name,'w')
    # FIX: if out_file is a string, then open
    o = out_file
    # print f.listobjects()
    # print f.listitems()

    bag_root = f["/BAG_root"]
    metadata_xml = "".join(bag_root["metadata"])
    # o = file('metadata.xml','w')
    # o.write(metadata_xml)
    # del o

    # root = etree.parse(StringIO(metadata_xml)).getroot()
    # root = etree.parse(StringIO(metadata_xml.replace('smXML:',''))).getroot()
    root = etree.XML(metadata_xml.replace("smXML:", ""))

    xmin = float(root.xpath("//*/westBoundLongitude")[0].text)
    xmax = float(root.xpath("//*/eastBoundLongitude")[0].text)

    ymin = float(root.xpath("//*/southBoundLatitude")[0].text)
    ymax = float(root.xpath("//*/northBoundLatitude")[0].text)

    # WARNING: This date does not relate to the dates the survey was collected!
    date = root.xpath("//*/CI_Date/date")[0].text
    abstract = root.xpath("//*/abstract")[0].text

    timestamp = ""  # No timestamp if we can't handle it
    try:
        import datetime, magicdate

        adate = magicdate.magicdate(date)
        timestamp = "<TimeStamp>" + adate.strftime(iso8601_timeformat) + "</TimeStamp>"
    except:
        print "WARNING: Unable to handle timestamp:", date

    if v:
        print xmin, xmax, "->", ymin, ymax
        print "date:", date
        print "abstract:", abstract

    # import subprocess
    # p = subprocess.Popen(
    #    ['source-highlight','-s', 'xml', '--out-format=html'],
    #    stdin=subprocess.PIPE,
    #    stdout=subprocess.PIPE
    #    )
    # metadata_html = p.communicate(input=etree.tostring(root, pretty_print=True ) ) [0]

    metadata_html = (
        etree.tostring(root, pretty_print=True).replace("</", " ").replace("<", " ").replace(">", " ")
    )  # .replace('\n','<br/>\n')

    if v:
        print metadata_html

    if not title:
        title = "%s : %s" % (abstract, date)

    kml_data = {
        "title": title,
        "x": (xmin + xmax) / 2.0,
        "y": (ymin + ymax) / 2.0,
        "xmin": xmin,
        "xmax": xmax,
        "ymin": ymin,
        "ymax": ymax,
        "metadata": metadata_html,
        "timestamp": timestamp,
    }

    # o = file('out.kml','w')
    if kml_complete:
        o.write(
            """<?xml version="1.0" encoding="UTF-8"?>
<kml xmlns="http://www.opengis.net/kml/2.2" xmlns:gx="http://www.google.com/kml/ext/2.2" xmlns:kml="http://www.opengis.net/kml/2.2" xmlns:atom="http://www.w3.org/2005/Atom">
<Document>"""
        )

    if placemark:
        o.write(
            """
	<Placemark>
		<name>{title}</name>
		{timestamp}
		<description><![CDATA[
<pre>
{metadata}
</pre>
]]>
		</description>
		<Point> <coordinates> {x},{y},0 </coordinates> </Point>
	</Placemark>
"""
        )
    o.write(
        """
	<Placemark>
		<name>{title}</name>
		{timestamp}
		<LineString>
			<coordinates>
{xmin},{ymin},0
{xmax},{ymin},0
{xmax},{ymax},0
{xmin},{ymax},0
{xmin},{ymin},0
			</coordinates>
		</LineString>
	</Placemark>
""".format(
            **kml_data
        )
    )

    if kml_complete:
        o.write(
            """</Document>
</kml>
"""
        )

    return
Esempio n. 20
0
def add_bag_to_db(cx,
                  infile_name,
                  survey,
                  filename_base,
                  verbose=False,
                  write_xml=False):
    # filename_base - without .bag or path
    #print ('file:',infile_name, file=sys.stderr)

    v = verbose
    #if v:
    #    print ('opening:',infile_name,os.path.getsize(infile_name))
    f = h5py.File(infile_name)  #'H11302_OLS_OSS/H11302_2m_1.bag')
    #o = file('foo.out','w')

    bag_root = f['/BAG_root']
    metadata_xml = ''.join(bag_root['metadata'])
    #o = file('metadata.xml','w')
    #o.write(metadata_xml)
    #del o

    #root = etree.parse(StringIO(metadata_xml)).getroot()
    #root = etree.parse(StringIO(metadata_xml.replace('smXML:',''))).getroot()
    try:
        root = etree.XML(metadata_xml.replace('smXML:', ''))
    except:
        print('bad_metadata:', infile_name)  # What can we do?
        return  # ouch... better if we could try to fix it somehow

    x_min_metadata = float(root.xpath('//*/westBoundLongitude')[0].text)
    x_max_metadata = float(root.xpath('//*/eastBoundLongitude')[0].text)

    y_min_metadata = float(root.xpath('//*/southBoundLatitude')[0].text)
    y_max_metadata = float(root.xpath('//*/northBoundLatitude')[0].text)

    software = root.xpath('//*/BAG_ProcessStep/description')[0].text
    #print ('software:',software)

    utm_zone = int(root.xpath('//*/zone')[0].text)
    # The WGS84 geographic is often foulded up.

    utm_coords = root.xpath('//*/gml:coordinates',
                            namespaces={'gml':
                                        "http://www.opengis.net/gml"})[0].text
    #print ('\t',utm_coords)
    utm_coords = utm_coords.split()
    utm_x_min, utm_y_min = [float(coord) for coord in utm_coords[0].split(',')]
    utm_x_max, utm_y_max = [float(coord) for coord in utm_coords[1].split(',')]

    params = {'proj': 'utm', 'zone': utm_zone}
    proj = Proj(params)

    x_min, y_min = proj(utm_x_min, utm_y_min, inverse=True)
    x_max, y_max = proj(utm_x_max, utm_y_max, inverse=True)
    #print ('\t',utm_x_min,utm_y_min, utm_x_max,utm_y_max)
    #print ('\t\t',x_min,y_min,x_max,y_max)
    #print ('\t\t',x_min_metadata,y_min_metadata,x_max_metadata,y_max_metadata)
    if abs(x_max - x_max_metadata) > 0.05 or abs(y_max -
                                                 y_max_metadata) > 0.05:
        print('%s: %.4f %.4f %.4f %.4f' %
              (filename_base, x_min - x_min_metadata, y_min - y_min_metadata,
               x_max - x_max_metadata, y_max - y_max_metadata))

    vdatum = None
    datums = [
        entry.text.strip()
        for entry in root.xpath('//*/datum/RS_Identifier/code')
    ]
    if len(datums) == 0:
        pass
    elif 'MLLW' in datums:
        vdatum = 'MLLW'
    else:
        vdatum = datums[-1]  # just guess that it is the last one
        print('datums:', datums, '->', vdatum, filename_base)

    axes = (root.xpath('//*/axisDimensionProperties'))
    dx = dy = None
    width = height = None
    for axis in axes:
        #print(etree.tostring(axis, pretty_print=True))
        dim_name = axis.xpath('*/dimensionName')[0].text
        dim_size = int(axis.xpath('*/dimensionSize')[0].text)
        delta = float(axis.xpath('*/*/*/value')[0].text)
        #print ('dim_name: "%s"' % (dim_name,))
        if 'row' == dim_name:
            dy = delta
            height = dim_size
        elif 'column' == dim_name:
            dx = delta
            width = dim_size
        else:
            print('ERROR: unable to handle dim', dim_name)
            assert False

    # WARNING: This date does not relate to the dates the survey was collected!
    date = root.xpath('//*/CI_Date/date')[0].text
    abstract = root.xpath('//*/abstract')[0].text
    title = root.xpath('//*/title')[0].text

    timestamp = ''  # No timestamp if we can't handle it
    try:
        import datetime, magicdate
        #timestamp = magicdate.magicdate(date)
        creation = magicdate.magicdate(date)
        #timestamp = adate.strftime(iso8601_timeformat)
    except:
        print('WARNING: Unable to handle timestamp:', date)
        creation = None


#    if v:
#        print (x_min,x_max,'->',y_min,y_max)
#        print ('date:',date)
#        print ('abstract:',abstract)

    metadata_txt = etree.tostring(root, pretty_print=True).replace(
        '</', ' ').replace('<', ' ').replace('>',
                                             ' ')  #.replace('\n','<br/>\n')

    # FIX: base url must change based on the number of the survey
    base_url = 'http://surveys.ngdc.noaa.gov/mgg/NOS/coast/H10001-H12000/'
    dr_url = base_url + survey + '/DR/' + survey + '.pdf'
    bag_url = base_url + survey + '/BAG/' + filename_base + '.bag.gz'

    sql_field_names = ('file', 'survey', 'title', 'abstract', 'survey',
                       'creation', 'x_min', 'y_min', 'x_max', 'y_max', 'width',
                       'height', 'dx', 'dy', 'vdatum', 'utm_zone', 'dr_url',
                       'bag_url', 'metadata_txt', 'metadata_xml', 'utm_x_min',
                       'utm_y_min', 'utm_x_max', 'utm_y_max', 'software')

    file = filename_base

    # check for errors
    #for field in sql_field_names:
    #    print('%s:' % (field,) ,locals()[field])

    sql_insert = 'INSERT INTO bag (' + ','.join(
        sql_field_names) + ') VALUES (' + ', '.join(
            [':%s' % (field, ) for field in sql_field_names]) + ');'

    #print (bag_data)
    #print (sql_insert)
    cx.execute(sql_insert, locals())  # Passing locals sees crazy
    cx.commit()