def add_files(self, data_type, file_list):
     """
     Add files to the database
     
     Parameters:
         data_type (str): the data_type of the new files
         file_list (list): a list of dictionaries in the format
             local_path (str): path to the file,
             case (str): the case these files belong to
             name (str): the filename
             remote_path (str): the remote path of these files, optional
             transfer_type (str): the transfer type of these files, optional
             year (int): the year of the file, optional
             month (int): the month of the file, optional
             remote_uuid (str): remote globus endpoint id, optional
             remote_hostname (str): remote hostname for sftp transfer, optional
     """
     self._mutex.acquire()
     try:
         new_files = list()
         for file in file_list:
             new_files.append({
                 'name':
                 file['name'],
                 'local_path':
                 file['local_path'],
                 'local_status':
                 file.get('local_status', FileStatus.NOT_PRESENT.value),
                 'datatype':
                 data_type,
                 'case':
                 file['case'],
                 'year':
                 file.get('year', 0),
                 'month':
                 file.get('month', 0),
                 'remote_uuid':
                 file.get('remote_uuid', ''),
                 'remote_hostname':
                 file.get('remote_hostname', ''),
                 'remote_path':
                 file.get('remote_path', ''),
                 'remote_status':
                 FileStatus.NOT_PRESENT.value,
                 'local_size':
                 0,
                 'transfer_type':
                 file.get('transfer_type', 'local')
             })
         step = 50
         for idx in range(0, len(new_files), step):
             DataFile.insert_many(new_files[idx:idx + step]).execute()
     finally:
         self._mutex.release()
    def populate_file_list(self):
        """
        Populate the database with the required DataFile entries
        """
        msg = 'Creating file table'
        print_line(
            line=msg,
            event_list=self._event_list)
        newfiles = list()
        start_year = int(self._config['simulations']['start_year'])
        end_year = int(self._config['simulations']['end_year'])
        with DataFile._meta.database.atomic():
            # for each case
            for case in self._config['simulations']:
                if case in ['start_year', 'end_year', 'comparisons']:
                    continue
                # for each data type
                for _type in self._config['data_types']:
                    data_types_for_case = self._config['simulations'][case]['data_types']
                    if 'all' not in data_types_for_case:
                        if _type not in data_types_for_case:
                            continue

                    # setup the base local_path
                    local_path = self.render_file_string(
                        data_type=_type,
                        data_type_option='local_path',
                        case=case)

                    new_files = list()
                    if self._config['data_types'][_type].get('monthly') and self._config['data_types'][_type]['monthly'] in ['True', 'true', '1', 1]:
                        # handle monthly data
                        for year in range(start_year, end_year + 1):
                            for month in range(1, 13):
                                filename = self.render_file_string(
                                    data_type=_type,
                                    data_type_option='file_format',
                                    case=case,
                                    year=year,
                                    month=month)
                                r_path = self.render_file_string(
                                    data_type=_type,
                                    data_type_option='remote_path',
                                    case=case,
                                    year=year,
                                    month=month)
                                new_files.append({
                                    'name': filename,
                                    'remote_path': os.path.join(r_path, filename),
                                    'local_path': os.path.join(local_path, filename),
                                    'local_status': FileStatus.NOT_PRESENT.value,
                                    'case': case,
                                    'remote_status': FileStatus.NOT_PRESENT.value,
                                    'year': year,
                                    'month': month,
                                    'datatype': _type,
                                    'local_size': 0,
                                    'transfer_type': self._config['simulations'][case]['transfer_type'],
                                    'remote_uuid': self._config['simulations'][case].get('remote_uuid', ''),
                                    'remote_hostname': self._config['simulations'][case].get('remote_hostname', '')
                                })
                    else:
                        # handle one-off data
                        filename = self.render_file_string(
                                    data_type=_type,
                                    data_type_option='file_format',
                                    case=case)
                        r_path = self.render_file_string(
                                    data_type=_type,
                                    data_type_option='remote_path',
                                    case=case)
                        new_files.append({
                            'name': filename,
                            'remote_path': os.path.join(r_path, filename),
                            'local_path': os.path.join(local_path, filename),
                            'local_status': FileStatus.NOT_PRESENT.value,
                            'case': case,
                            'remote_status': FileStatus.NOT_PRESENT.value,
                            'year': 0,
                            'month': 0,
                            'datatype': _type,
                            'local_size': 0,
                            'transfer_type': self._config['simulations'][case]['transfer_type'],
                            'remote_uuid': self._config['simulations'][case].get('remote_uuid', ''),
                            'remote_hostname': self._config['simulations'][case].get('remote_hostname', '')
                        })
                    tail, _ = os.path.split(new_files[0]['local_path'])
                    if not os.path.exists(tail):
                        os.makedirs(tail)
                    step = 50
                    for idx in range(0, len(new_files), step):
                        DataFile.insert_many(
                            new_files[idx: idx + step]).execute()

            msg = 'Database update complete'
            print_line(msg, self._event_list)
Exemple #3
0
    def populate_file_list(self, simstart, simend, experiment):
        """
        Populate the database with the required DataFile entries

        Parameters:
            simstart (int): the start year of the simulation,
            simend (int): the end year of the simulation,
            experiment (str): the name of the experiment
                ex: 20170915.beta2.A_WCYCL1850S.ne30_oECv3_ICG.edison
        """
        print 'Creating file table'
        if self.sta:
            print 'Using short term archive'
        else:
            print 'Short term archive turned off'
        if not self.start_year:
            self.start_year = simstart
        newfiles = []
        with DataFile._meta.database.atomic():
            for _type in self.types:
                if _type not in file_type_map:
                    continue
                if _type == 'rest':
                    self.populate_handle_rest(simstart, newfiles)
                elif _type in [
                        'streams.ocean', 'streams.cice', 'mpas-o_in',
                        'mpas-cice_in'
                ]:
                    self.populate_handle_mpas(_type, newfiles)
                elif _type == 'meridionalHeatTransport':
                    self.populate_heat_transport(newfiles)
                else:
                    local_base = os.path.join(self.local_path, _type)
                    if not os.path.exists(local_base):
                        os.makedirs(local_base)
                    for year in xrange(simstart, simend + 1):
                        for month in xrange(1, 13):
                            if _type == 'atm':
                                name = file_type_map[_type].replace(
                                    'EXPERIMENT', experiment)
                            else:
                                name = file_type_map[_type]
                            yearstr = '{0:04d}'.format(year)
                            monthstr = '{0:02d}'.format(month)
                            name = name.replace('YEAR', yearstr)
                            name = name.replace('MONTH', monthstr)
                            local_path = os.path.join(local_base, name)
                            if self.sta:
                                remote_path = os.path.join(
                                    self.remote_path, 'archive', _type, 'hist',
                                    name)
                            else:
                                remote_path = os.path.join(
                                    self.remote_path, name)
                            newfiles = self._add_file(newfiles=newfiles,
                                                      name=name,
                                                      local_path=local_path,
                                                      remote_path=remote_path,
                                                      _type=_type,
                                                      year=year,
                                                      month=month)
            print 'Inserting file data into the table'
            self.mutex.acquire()
            try:
                step = 50
                for idx in range(0, len(newfiles), step):
                    DataFile.insert_many(newfiles[idx:idx + step]).execute()
            except Exception as e:
                print_debug(e)
            finally:
                if self.mutex.locked():
                    self.mutex.release()
            print 'Database update complete'