def get_data(): page = html.parse(url) root = page.getroot() datenames = root.find_class('datename') missiondatas = root.find_class('missiondata') missdescrips = root.find_class('missdescrip') launches = zip(datenames, missiondatas, missdescrips) res = [] curtime = time.gmtime() year = curyear = curtime.tm_year curmon = curtime.tm_mon prev_mon = -1 for name, data, desc in launches: datestr = name.find_class('launchdate')[0].text.strip() mission = name.find_class('mission')[0].text.strip() desc = desc.text_content().strip() timestr, site = data.text_content().strip().split('\n') site = rm_pref(site) timestr = rm_pref(timestr) mon, day = parse_date(datestr) tm1, tm2 = parse_time(timestr) if mon > 0: if mon < prev_mon: year += 1 elif mon < curmon - 2 and curyear == year: year = curyear + 1 prev_mon = mon stamp = None if mon > 0 and day > 0: date = '%02d-%02d-%04d' % (day, mon, year) if tm1: dt = '%s %s' % (date, tm1) stamp = get_stamp(time.strptime(dt, '%d-%m-%Y %H:%M')) else: date = '%s %s' % (datestr, year) res.append({'tm1': tm1, 'tm2': tm2, 'timestr': timestr, 'date': date, 'stamp': stamp, 'mission': mission, 'site': site, 'desc': desc}) return res
def _parse_duration_combined(durationstr): #Period of the form P<date>T<time> #Split the string in to its component parts datepart, timepart = durationstr[1:].split('T') #We skip the 'P' datevalue = parse_date(datepart) timevalue = parse_time(timepart) totaldays = datevalue.year * 365 + datevalue.month * 30 + datevalue.day return datetime.timedelta(days=totaldays, hours=timevalue.hour, minutes=timevalue.minute, seconds=timevalue.second, microseconds=timevalue.microsecond)
def parse_duration_combined(durationstr): #Period of the form P<date>T<time> #Split the string in to its component parts datepart, timepart = durationstr[1:].split('T') #We skip the 'P' datevalue = parse_date(datepart) timevalue = parse_time(timepart) totaldays = datevalue.year * 365 + datevalue.month * 30 + datevalue.day return datetime.timedelta(days=totaldays, hours=timevalue.hour, minutes=timevalue.minute, seconds=timevalue.second, microseconds=timevalue.microsecond)
def parse_datetime(isodatetimestr, delimiter='T'): #Given a string in ISO8601 date time format, return a datetime.datetime #object that corresponds to the given date time. #By default, the ISO8601 specified T delimiter is used to split the #date and time (<date>T<time>). Fixed offset tzdata will be included #if UTC offset is given in the input string. isodatestr, isotimestr = isodatetimestr.split(delimiter) datepart = parse_date(isodatestr) timepart = parse_time(isotimestr) return datetime.datetime.combine(datepart, timepart)
def parse_interval(isointervalstr, intervaldelimiter='/', datetimedelimiter='T'): #Given a string representing an ISO8601 interval, return a #tuple of datetime.date or date.datetime objects representing the beginning #and end of the specified interval. Valid formats are: # #<start>/<end> #<start>/<duration> #<duration>/<end> # #The <start> and <end> values can represent dates, or datetimes, #not times. # #The format: # #<duration> # #Is expressly not supported as there is no way to provide the addtional #required context. firstpart, secondpart = isointervalstr.split(intervaldelimiter) if firstpart[0] == 'P': #<duration>/<end> #Notice that these are not returned 'in order' (earlier to later), this #is to maintain consistency with parsing <start>/<end> durations, as #well as making repeating interval code cleaner. Users who desire #durations to be in order can use the 'sorted' operator. #We need to figure out if <end> is a date, or a datetime if secondpart.find(datetimedelimiter) != -1: #<end> is a datetime duration = parse_duration(firstpart) enddatetime = parse_datetime(secondpart, delimiter=datetimedelimiter) return (enddatetime, enddatetime - duration) else: #<end> must just be a date duration = parse_duration(firstpart) enddate = parse_date(secondpart) #See if we need to upconvert to datetime to preserve resolution if firstpart.find(datetimedelimiter) != -1: return (enddate, datetime.combine(enddate, datetime.min.time()) - duration) else: return (enddate, enddate - duration) elif secondpart[0] == 'P': #<start>/<duration> #We need to figure out if <start> is a date, or a datetime if firstpart.find(datetimedelimiter) != -1: #<end> is a datetime duration = parse_duration(secondpart) startdatetime = parse_datetime(firstpart, delimiter=datetimedelimiter) return (startdatetime, startdatetime + duration) else: #<start> must just be a date duration = parse_duration(secondpart) startdate = parse_date(firstpart) #See if we need to upconvert to datetime to preserve resolution if secondpart.find(datetimedelimiter) != -1: return (startdate, datetime.combine(startdate, datetime.min.time()) + duration) else: return (startdate, startdate + duration) else: #<start>/<end> if firstpart.find(datetimedelimiter) != -1 and secondpart.find(datetimedelimiter) != -1: #Both parts are datetimes return (parse_datetime(firstpart, delimiter=datetimedelimiter), parse_datetime(secondpart, delimiter=datetimedelimiter)) elif firstpart.find(datetimedelimiter) != -1 and secondpart.find(datetimedelimiter) == -1: #First part is a datetime, second part is a date return (parse_datetime(firstpart, delimiter=datetimedelimiter), parse_date(secondpart)) elif firstpart.find(datetimedelimiter) == -1 and secondpart.find(datetimedelimiter) != -1: #First part is a date, second part is a datetime return (parse_date(firstpart), parse_datetime(secondpart, delimiter=datetimedelimiter)) else: #Both parts are dates return (parse_date(firstpart), parse_date(secondpart))
def parse_interval(isointervalstr, intervaldelimiter='/', datetimedelimiter='T'): #Given a string representing an ISO8601 interval, return a #tuple of datetime.date or date.datetime objects representing the beginning #and end of the specified interval. Valid formats are: # #<start>/<end> #<start>/<duration> #<duration>/<end> # #The <start> and <end> values can represent dates, or datetimes, #not times. # #The format: # #<duration> # #Is expressly not supported as there is no way to provide the addtional #required context. firstpart, secondpart = isointervalstr.split(intervaldelimiter) if firstpart[0] == 'P': #<duration>/<end> #Notice that these are not returned 'in order' (earlier to later), this #is to maintain consistency with parsing <start>/<end> durations, as #well asmaking repeating interval code cleaner. Users who desire #durations to be in order can use the 'sorted' operator. #We need to figure out if <end> is a date, or a datetime if secondpart.find(datetimedelimiter) != -1: #<end> is a datetime duration = parse_duration(firstpart) enddatetime = parse_datetime(secondpart, delimiter=datetimedelimiter) return (enddatetime, enddatetime - duration) else: #<end> must just be a date duration = parse_duration(firstpart) enddate = parse_date(secondpart) return (enddate, enddate - duration) elif secondpart[0] == 'P': #<start>/<duration> #We need to figure out if <start> is a date, or a datetime if firstpart.find(datetimedelimiter) != -1: #<end> is a datetime duration = parse_duration(secondpart) startdatetime = parse_datetime(firstpart, delimiter=datetimedelimiter) return (startdatetime, startdatetime + duration) else: #<start> must just be a date duration = parse_duration(secondpart) startdate = parse_date(firstpart) return (startdate, startdate + duration) else: #<start>/<end> if firstpart.find(datetimedelimiter) != -1 and secondpart.find( datetimedelimiter) != -1: #Both parts are datetimes return (parse_datetime(firstpart, delimiter=datetimedelimiter), parse_datetime(secondpart, delimiter=datetimedelimiter)) elif firstpart.find(datetimedelimiter) != -1 and secondpart.find( datetimedelimiter) == -1: #First part is a datetime, second part is a date return (parse_datetime(firstpart, delimiter=datetimedelimiter), parse_date(secondpart)) elif firstpart.find(datetimedelimiter) == -1 and secondpart.find( datetimedelimiter) != -1: #First part is a date, second part is a datetime return (parse_date(firstpart), parse_datetime(secondpart, delimiter=datetimedelimiter)) else: #Both parts are dates return (parse_date(firstpart), parse_date(secondpart))