def test_pat_mm(self): "Test PAT_YYYY pattern" date = '01' res = True if PAT_DD.match(date) else False self.assertEqual(res, True) date = '31' res = True if PAT_DD.match(date) else False self.assertEqual(res, True) date = '32' res = True if PAT_DD.match(date) else False self.assertEqual(res, False) date = '00' res = True if PAT_DD.match(date) else False self.assertEqual(res, False)
def hdfs_file(odir, name): """ Given HDFS dir and file name create appropriate dir structure on HDFS and return full path of the file. We rely on odir/YYYY/MM/DD dir structure. """ tstamp = name.split('/')[-1].split('_')[ 0] # each file is in form YYYYMMDD_HHMM.ext if not PAT_YYYYMMDD.match(tstamp): raise Exception( "Given file name '%s' does not contain YYYYMMDD stamp" % name) year = tstamp[:4] if not PAT_YYYY.match(year): raise Exception("Given file name '%s' does not contain YYYY stamp" % name) month = tstamp[4:6] if not PAT_MM.match(month): raise Exception("Given file name '%s' does not contain MM stamp" % name) day = tstamp[6:8] if not PAT_DD.match(day): raise Exception("Given file name '%s' does not contain DD stamp" % name) if not hdfs.path.isdir(odir): hdfs.mkdir(odir) for subdir in [year, month, day]: odir = os.path.join(odir, subdir) if not hdfs.path.isdir(odir): hdfs.mkdir(odir) return os.path.join(odir, name)
def hdfs_file(odir, name): """ Given HDFS dir and file name create appropriate dir structure on HDFS and return full path of the file. We rely on odir/YYYY/MM/DD dir structure. """ tstamp = name.split('/')[-1].split('_')[0] # each file is in form YYYYMMDD_HHMM.ext if not PAT_YYYYMMDD.match(tstamp): raise Exception("Given file name '%s' does not contain YYYYMMDD stamp" % name) year = tstamp[:4] if not PAT_YYYY.match(year): raise Exception("Given file name '%s' does not contain YYYY stamp" % name) month = tstamp[4:6] if not PAT_MM.match(month): raise Exception("Given file name '%s' does not contain MM stamp" % name) day = tstamp[6:8] if not PAT_DD.match(day): raise Exception("Given file name '%s' does not contain DD stamp" % name) if not hdfs.path.isdir(odir): hdfs.mkdir(odir) for subdir in [year, month, day]: odir = os.path.join(odir, subdir) if not hdfs.path.isdir(odir): hdfs.mkdir(odir) return os.path.join(odir, name)