def dateformat(value): """Return seconds since epoch for provided YYYYMMDD or number with suffix 'd' for days""" msg = 'Unacceptable date format, value=%s, type=%s,' \ % (value, type(value)) msg += " supported format is YYYYMMDD or number with suffix 'd' for days" value = str(value).lower() if PAT_YYYYMMDD.match(value): # we accept YYYYMMDD if len(value) == 8: # YYYYMMDD year = value[0:4] if not PAT_YYYY.match(year): raise Exception(msg + ', fail to parse the year part, %s' % year) month = value[4:6] date = value[6:8] ddd = datetime.date(int(year), int(month), int(date)) else: raise Exception(msg) return calendar.timegm((ddd.timetuple())) elif value.endswith('d'): try: days = int(value[:-1]) except ValueError: raise Exception(msg) return time.time() - days * 24 * 60 * 60 else: raise Exception(msg)
def dateformat(value): """Return seconds since epoch for provided YYYYMMDD or number with suffix 'd' for days""" msg = 'Unacceptable date format, value=%s, type=%s,' \ % (value, type(value)) msg += " supported format is YYYYMMDD or number with suffix 'd' for days" value = str(value).lower() if PAT_YYYYMMDD.match(value): # we accept YYYYMMDD if len(value) == 8: # YYYYMMDD year = value[0:4] if not PAT_YYYY.match(year): raise Exception(msg + ', fail to parse the year part, %s' % year) month = value[4:6] date = value[6:8] ddd = datetime.date(int(year), int(month), int(date)) else: raise Exception(msg) return calendar.timegm((ddd.timetuple())) elif value.endswith('d'): try: days = int(value[:-1]) except ValueError: raise Exception(msg) return time.time()-days*24*60*60 else: raise Exception(msg)
def hdfs_file(odir, name): """ Given HDFS dir and file name create appropriate dir structure on HDFS and return full path of the file. We rely on odir/YYYY/MM/DD dir structure. """ tstamp = name.split('/')[-1].split('_')[ 0] # each file is in form YYYYMMDD_HHMM.ext if not PAT_YYYYMMDD.match(tstamp): raise Exception( "Given file name '%s' does not contain YYYYMMDD stamp" % name) year = tstamp[:4] if not PAT_YYYY.match(year): raise Exception("Given file name '%s' does not contain YYYY stamp" % name) month = tstamp[4:6] if not PAT_MM.match(month): raise Exception("Given file name '%s' does not contain MM stamp" % name) day = tstamp[6:8] if not PAT_DD.match(day): raise Exception("Given file name '%s' does not contain DD stamp" % name) if not hdfs.path.isdir(odir): hdfs.mkdir(odir) for subdir in [year, month, day]: odir = os.path.join(odir, subdir) if not hdfs.path.isdir(odir): hdfs.mkdir(odir) return os.path.join(odir, name)
def test_pat_yyyy(self): "Test PAT_YYYY pattern" date = '2015' res = True if PAT_YYYY.match(date) else False self.assertEqual(res, True) date = '2123' res = True if PAT_YYYY.match(date) else False self.assertEqual(res, False) date = '20151' res = True if PAT_YYYY.match(date) else False self.assertEqual(res, False) date = '3015' res = True if PAT_YYYY.match(date) else False self.assertEqual(res, False) date = '1015' res = True if PAT_YYYY.match(date) else False self.assertEqual(res, False)
def hdfs_file(odir, name): """ Given HDFS dir and file name create appropriate dir structure on HDFS and return full path of the file. We rely on odir/YYYY/MM/DD dir structure. """ tstamp = name.split('/')[-1].split('_')[0] # each file is in form YYYYMMDD_HHMM.ext if not PAT_YYYYMMDD.match(tstamp): raise Exception("Given file name '%s' does not contain YYYYMMDD stamp" % name) year = tstamp[:4] if not PAT_YYYY.match(year): raise Exception("Given file name '%s' does not contain YYYY stamp" % name) month = tstamp[4:6] if not PAT_MM.match(month): raise Exception("Given file name '%s' does not contain MM stamp" % name) day = tstamp[6:8] if not PAT_DD.match(day): raise Exception("Given file name '%s' does not contain DD stamp" % name) if not hdfs.path.isdir(odir): hdfs.mkdir(odir) for subdir in [year, month, day]: odir = os.path.join(odir, subdir) if not hdfs.path.isdir(odir): hdfs.mkdir(odir) return os.path.join(odir, name)