Ejemplo n.º 1
0
 def test_get_facility(self):
     filename = 'IMOS_ANMN-NRS_CDEKOSTUZ_20121113T001841Z_NRSMAI_FV01_Profile-SBE-19plus.nc'
     self.assertEqual(FileClassifier._get_facility(filename),
                      ('ANMN', 'NRS'))
     with self.assertRaisesRegex(InvalidFileNameError,
                                 'Missing sub-facility in file name'):
         FileClassifier._get_facility('IMOS_NO_SUB_FACILITY.nc')
Ejemplo n.º 2
0
    def dest_path(self, src_file):
        dir_list = []
        project = def_project(src_file)

        if project not in VALID_PROJECT:
            raise InvalidFileNameError(
                "Invalid project name '{project}'. "
                "Project should be IMOS, SOOP-CO2_RT or Future_Reef_MAP".
                format(project=project))

        if project in ['IMOS', 'SOOP-CO2_RT']:
            fields = FileClassifier._get_file_name_fields(src_file)
            ship_code = fields[4]
            if ship_code not in self.ship_callsign_ls:
                raise InvalidFileNameError(
                    "Missing vessel callsign in file name '{name}'.".format(
                        name=src_file))

            project_base = 'IMOS'
            facility = fields[1][:4]
            sub_facility = fields[1]
            platform = "{ship_code}_{ship_name}".format(
                ship_code=ship_code,
                ship_name=self.ship_callsign_ls[ship_code])
            dir_list.extend([project_base, facility, sub_facility, platform])

        if project == 'FutureReefMap':
            fields = FileClassifier._get_file_name_fields(src_file,
                                                          min_fields=5)
            ship_code = fields[3]
            if ship_code not in self.ship_callsign_ls:
                raise InvalidFileNameError(
                    "Missing vessel callsign in file name '{name}'.".format(
                        name=src_file))

            dir_list.append('Future_Reef_MAP')
            data_type = 'underway'
            dir_list.extend([data_type, self.ship_callsign_ls[ship_code]])

        if project in ['IMOS', 'FutureReefMap']:
            att_list = FileClassifier._get_nc_att(
                src_file, ['cruise_id', 'time_coverage_start'])
            year = att_list[1][:4]
            cruise_id = att_list[0]
            dir_list.extend([year, cruise_id])

        if project == 'SOOP-CO2_RT':
            data_type = 'REALTIME'
            time_start = FileClassifier._get_nc_att(src_file,
                                                    'time_coverage_start')
            year = time_start[:4]
            month = time_start[5:7]
            month = month.lstrip('0')
            dir_list.extend([data_type, year, month])

        dir_path = FileClassifier._make_path(dir_list)
        return os.path.join(dir_path, os.path.basename(src_file))
Ejemplo n.º 3
0
def get_deployment_id(src_file, ship_code):
    """
    harmonise way shipcallsign are written in  deployment codes: replace underscore by hyphen
    deployment_id format : shipcallsign_datestart-dateend
    get_nc_att will raise a n exception if the attribute is missing
    :param src_file:
    :param ship_code:
    :return: deployment_id
    """

    deployment_id = FileClassifier._get_nc_att(src_file.src_path,
                                               'deployment_id')
    name_parts = deployment_id.split('_')
    ship_callsign_ls = ship_callsign_list()

    if len(name_parts) == 3:
        if re.match('[0-9]{8}-[0-9]{8}', name_parts[-1]):
            deployment_id = "{ship_name}_{dateend}".format(
                ship_name=ship_callsign_ls[ship_code], dateend=name_parts[-1])
        else:
            deployment_id = "{ship_name}_{dateend}_{frequency}".format(
                ship_name=ship_callsign_ls[ship_code],
                dateend=name_parts[-2],
                frequency=name_parts[-1])
    elif len(name_parts) == 4:
        deployment_id = "{ship_name}_{dateend}_{frequency}".format(
            ship_name=ship_callsign_ls[ship_code],
            dateend=name_parts[-2],
            frequency=name_parts[-1])
    return deployment_id
Ejemplo n.º 4
0
    def archive_path(self, src_file):
        """
        Generate archive path for RT file based on vessel_code
            eg:IN_2017-165-0000dat.txt
              <Vessel_code>_yyyy-ddd-hhmmdat.txt
        :return: relative archive path- full path, including file name
        eg: 'IMOS/SOOP/SOOP-CO2/VLMJ_Investigator/REALTIME/2018/1/IN_2018-022-0000dat.txt'
        """
        dir_list = []
        project = 'IMOS'
        facility = 'SOOP'
        sub_facility = 'SOOP-CO2'
        data_type = 'REALTIME'
        dir_list.extend([project, facility, sub_facility])
        fields = FileClassifier._get_file_name_fields(
            os.path.basename(src_file), min_fields=2)
        if fields[0] in VESSEL_CODE:
            ship_code = VESSEL_CODE[fields[0]]
        else:
            raise InvalidFileNameError(
                "File {file} has an invalid vessel code or is not a valid SOOP-CO2 realtime file"
                .format(file=os.path.basename(src_file)))
        platform = "{ship_code}_{ship_name}".format(
            ship_code=ship_code, ship_name=self.ship_callsign_ls[ship_code])
        dir_list.extend([platform, data_type])
        year = int(fields[1][:4])
        dir_list.append(year)
        jday = int(fields[1][5:8])
        if not (jday in range(0, 367)) or year < 2017:
            raise InvalidFileNameError(
                "Failed extracting valid [year, day] from file {file}".format(
                    file=os.path.basename(src_file)))

        # Determine month from julian day (1-365). Leap year taken into account
        year_to_ordinal = datetime.date(year, 1, 1).toordinal() + jday - 1
        month = datetime.date.fromordinal(year_to_ordinal).month
        dir_list.append(month)
        dir_list.append(os.path.basename(src_file))
        archive_file_path = FileClassifier._make_path(dir_list)

        return archive_file_path
Ejemplo n.º 5
0
    def test_get_nc_att(self):
        make_test_file(
            self.testfile, {
                'site_code': 'TEST1',
                'title': 'Test file',
                'time_start': '2017-09-01T01:02:03Z'
            })
        self.assertEqual(
            FileClassifier._get_nc_att(self.testfile, 'site_code'), 'TEST1')
        self.assertEqual(
            FileClassifier._get_nc_att(self.testfile, 'missing', ''), '')
        self.assertEqual(
            FileClassifier._get_nc_att(self.testfile, ['site_code', 'title']),
            ['TEST1', 'Test file'])
        self.assertEqual(
            FileClassifier._get_nc_att(self.testfile,
                                       'time_start',
                                       time_format=True),
            datetime(2017, 9, 1, 1, 2, 3))

        self.assertRaises(InvalidFileContentError, FileClassifier._get_nc_att,
                          self.testfile, 'missing')
Ejemplo n.º 6
0
def dest_path_soop_ba(src_file):
    dir_list = []
    fields = FileClassifier._get_file_name_fields(src_file.name)
    ship_code = fields[4]
    ship_callsign_ls = ship_callsign_list()

    if ship_code not in ship_callsign_ls:
        raise InvalidFileNameError(
            "Missing vessel callsign in file name '{name}'.".format(
                name=src_file.name))

    project = fields[0]
    facility = fields[1][:4]
    sub_facility = fields[1]
    platform = "{ship_code}_{ship_name}".format(
        ship_code=ship_code, ship_name=ship_callsign_ls[ship_code])
    dir_list.extend([project, facility, sub_facility, platform])

    deployment_id = get_deployment_id(src_file, ship_code)

    dir_list.append(deployment_id)
    return FileClassifier._make_path(dir_list)
Ejemplo n.º 7
0
 def test_get_file_name_fields(self):
     fields = [
         'IMOS', 'ANMN-NRS', 'CDEKOSTUZ', '20121113T001841Z', 'NRSMAI',
         'FV01', 'Profile-SBE-19plus'
     ]
     filename = '_'.join(fields) + '.nc'
     self.assertEqual(FileClassifier._get_file_name_fields(filename),
                      fields)
     fields = [
         'IMOS', 'ANMN-NRS', 'ACESTZ', '20140507T000300Z', 'NRSKAI', 'FV02',
         'NRSKAI-1405-NXIC-CTD-36.12-burst-averaged',
         'END-20141028T230300Z', 'C-20160202T020400Z'
     ]
     filename = '_'.join(fields) + '.nc'
     self.assertEqual(FileClassifier._get_file_name_fields(filename),
                      fields)
     fields = ['IMOS', 'ANMN-NRS', '20110203', 'NRSPHB', 'FV01', 'LOGSHT']
     filename = '_'.join(fields) + '.nc'
     self.assertEqual(FileClassifier._get_file_name_fields(filename),
                      fields)
     with self.assertRaisesRegex(InvalidFileNameError,
                                 'has less than 4 fields in file name'):
         FileClassifier._get_file_name_fields('bad_file_name', min_fields=4)
Ejemplo n.º 8
0
def archive_path_soop_ba(src_file):
    """Define the archive path based on info from NetCDF"""
    dir_list = []
    fields = FileClassifier._get_file_name_fields(src_file.name)
    ship_code = fields[4]
    ship_callsign_ls = ship_callsign_list()

    if ship_code not in ship_callsign_ls:
        raise InvalidFileNameError(
            "Missing vessel callsign in file name '{name}'.".format(
                name=src_file.name))

    project = fields[0]
    facility = fields[1][:4]
    sub_facility = fields[1]
    raw_folder = 'raw'
    platform = "{ship_code}_{ship_name}".format(
        ship_code=ship_code, ship_name=ship_callsign_ls[ship_code])
    dir_list.extend([project, facility, sub_facility, raw_folder, platform])

    deployment_id = get_deployment_id(src_file, ship_code)
    dir_list.append(deployment_id)
    return FileClassifier._make_path(dir_list)
Ejemplo n.º 9
0
def def_project(src_file):
    """
    Define project name according to file name or extension
    Differentiate IMOS SOOP-CO2 RealTime from Delayed Mode and Future_Reef_MAP files
    eg : (DM) IMOS_SOOP-CO2_GST_20121027T045200Z_VLHJ_FV01.nc
         (DM) FutureReefMap_GST_20140530T185029Z_9V2768_FV01.nc
         <Project_Name>_<<Facility-Code>>_<Data-Code>_<Start-date>_<Platform-Code>_FV<File-Version>.nc
         (RT) IN_2017-165-0000dat.txt
          <Vessel_code>_yyyy-ddd-hhmmdat.tx
          :type src_file: object
    return: destination relative path to destination folder.
    eg: 'IMOS/SOOP/SOOP-CO2/VNAA_Aurora-Australis/2017/AA1617_V3/'
    """

    if src_file.endswith('.nc'):
        fields = FileClassifier._get_file_name_fields(src_file, min_fields=5)
        if 'FV00' in src_file:
            return fields[1] + '_RT'  # <Facility-Name + RTnc suffix>
        else:
            return fields[0]  # <Project-Name>
    else:
        raise InvalidFileFormatError(
            "'{name}'is not a valid RT CO2 data (*dat.txt) or not a NetCDF file."
            .format(name=src_file))
Ejemplo n.º 10
0
 def test_make_path(self):
     path = FileClassifier._make_path(['dir1', u'dir2', u'dir3'])
     self.assertTrue(isinstance(path, str))
Ejemplo n.º 11
0
 def test_get_variable_names(self):
     make_test_file(self.testfile, {}, PRES={}, TEMP={}, PSAL={})
     output = set(FileClassifier._get_variable_names(self.testfile))
     self.assertEqual(output, {'PRES', 'TEMP', 'PSAL'})
Ejemplo n.º 12
0
 def test_get_site_code(self):
     make_test_file(self.testfile, {'site_code': 'TEST1'})
     self.assertEqual(FileClassifier._get_site_code(self.testfile), 'TEST1')