def test_filemanager_update_local(self):
     sta = False
     types = ['atm', 'ice', 'ocn', 'rest', 'streams.cice', 'streams.ocean']
     database = 'test.db'
     simstart = 51
     simend = 60
     experiment = '20171011.beta2_FCT2-icedeep_branch.A_WCYCL1850S.ne30_oECv3_ICG.edison'
     filemanager = FileManager(mutex=self.mutex,
                               sta=sta,
                               types=types,
                               database=database,
                               remote_endpoint=self.remote_endpoint,
                               remote_path=self.remote_path,
                               local_endpoint=self.local_endpoint,
                               local_path=self.local_path)
     filemanager.populate_file_list(simstart=simstart,
                                    simend=simend,
                                    experiment=experiment)
     self.mutex.acquire()
     df = DataFile.select().limit(1)
     name = df[0].name
     head, tail = os.path.split(df[0].local_path)
     if not os.path.exists(head):
         os.makedirs(head)
     with open(df[0].local_path, 'w') as fp:
         fp.write('this is a test file')
     if self.mutex.locked():
         self.mutex.release()
     filemanager.update_local_status()
     self.mutex.acquire()
     df = DataFile.select().where(DataFile.name == name)[0]
     self.assertEqual(df.local_status, 0)
     self.assertTrue(df.local_size > 0)
    def __init__(self, mutex, event_list, config, database='processflow.db'):
        """
        Parameters:
            mutex (theading.Lock) the mutext for accessing the database
            database (str): the path to where to create the sqlite database file
            config (dict): the global configuration dict
        """
        self._mutex = mutex
        self._event_list = event_list
        self._db_path = database
        self._config = config

        if os.path.exists(database):
            os.remove(database)

        self._mutex.acquire()
        DataFile._meta.database.init(database)
        if DataFile.table_exists():
            DataFile.drop_table()

        DataFile.create_table()
        if self._mutex.locked():
            self._mutex.release()

        self.thread_list = list()
        self.kill_event = threading.Event()
Esempio n. 3
0
def configDataFile(request):
    if request.method == 'POST':
        form = trainingDataForm(request.POST, request.FILES)
        if form.is_valid():
            fileNameValue=request.POST['FileName']
            #dataFolderInstance=get_object_or_404(DataFolder, pk=1)
            newdoc = DataFile(dataFile = request.FILES['docfile'],
                              FileName = fileNameValue,
                              CreatedBy = 'chandan',
                              LastUpdated = timezone.now(),
                              Description ='')
            newdoc.save()
            # Redirect to the document list after POST
            return HttpResponseRedirect(reverse('curiousWorkbench:configDataFile'))
        else:
            form = trainingDataForm()
    #A empty, unbound form
    #Load documents for the list page
    #documents = DataFile.objects.all()
    dataFileList = DataFile.objects.order_by('-LastUpdated')
    template = loader.get_template('curiousWorkbench/configDataFile.html')
    #Render list page with the documents and the form
    return render_to_response(
        'curiousWorkbench/configDataFile.html',
        {'dataFileList': dataFileList, 'form': trainingDataForm},
        context_instance=RequestContext(request))
Esempio n. 4
0
File: insert.py Progetto: bfeng/dfs
    def post(self):
        key = self.request.get("key")
        value = self.request.get("value")

        filename = urllib.unquote(key)

        # Clean up current file
        query = DataFile.all().filter("f_key", filename)

        for data_file in query:
            data_file.delete()

        # Create a file
        writable_file_name = files.blobstore.create(mime_type="application/octect-stream")

        with files.open(writable_file_name, "a") as f:
            f.write(value)
        files.finalize(writable_file_name)

        blob_key = files.blobstore.get_blob_key(writable_file_name)

        data_file = DataFile(f_key=filename, f_value=blob_key)
        data_file.put()

        if memcache.get(key="turn") == "on":
            if BlobInfo.get(blob_key).size <= 100000:
                memcache.set(key=filename, value=value, time=3600)

        write_boolean(self, True)
Esempio n. 5
0
  def post(self):
    key = self.request.get('key')
    value = self.request.get('value')

    data_file = DataFile(f_key=key, f_value=value)
    data_file.put()
    self.response.headers['Content-Type'] = 'text/json'
    self.response.out.write('{"type":"boolean", "value":"true"}')
 def add_files(self, data_type, file_list):
     """
     Add files to the database
     
     Parameters:
         data_type (str): the data_type of the new files
         file_list (list): a list of dictionaries in the format
             local_path (str): path to the file,
             case (str): the case these files belong to
             name (str): the filename
             remote_path (str): the remote path of these files, optional
             transfer_type (str): the transfer type of these files, optional
             year (int): the year of the file, optional
             month (int): the month of the file, optional
             remote_uuid (str): remote globus endpoint id, optional
             remote_hostname (str): remote hostname for sftp transfer, optional
     """
     self._mutex.acquire()
     try:
         new_files = list()
         for file in file_list:
             new_files.append({
                 'name':
                 file['name'],
                 'local_path':
                 file['local_path'],
                 'local_status':
                 file.get('local_status', FileStatus.NOT_PRESENT.value),
                 'datatype':
                 data_type,
                 'case':
                 file['case'],
                 'year':
                 file.get('year', 0),
                 'month':
                 file.get('month', 0),
                 'remote_uuid':
                 file.get('remote_uuid', ''),
                 'remote_hostname':
                 file.get('remote_hostname', ''),
                 'remote_path':
                 file.get('remote_path', ''),
                 'remote_status':
                 FileStatus.NOT_PRESENT.value,
                 'local_size':
                 0,
                 'transfer_type':
                 file.get('transfer_type', 'local')
             })
         step = 50
         for idx in range(0, len(new_files), step):
             DataFile.insert_many(new_files[idx:idx + step]).execute()
     finally:
         self._mutex.release()
 def write_database(self):
     """
     Write out a human readable version of the database for debug purposes
     """
     file_list_path = os.path.join(self._config['global']['project_path'],
                                   'output', 'file_list.txt')
     with open(file_list_path, 'w') as fp:
         self._mutex.acquire()
         try:
             for case in self._config['simulations']:
                 if case in ['start_year', 'end_year', 'comparisons']:
                     continue
                 fp.write('+++++++++++++++++++++++++++++++++++++++++++++')
                 fp.write('\n\t{case}\t\n'.format(case=case))
                 fp.write('+++++++++++++++++++++++++++++++++++++++++++++\n')
                 q = (DataFile.select(DataFile.datatype).where(
                     DataFile.case == case).distinct())
                 for df_type in q.execute():
                     _type = df_type.datatype
                     fp.write('===================================\n')
                     fp.write('\t' + _type + ':\n')
                     datafiles = (DataFile.select().where(
                         (DataFile.datatype == _type)
                         & (DataFile.case == case)))
                     for datafile in datafiles.execute():
                         filestr = '-------------------------------------'
                         filestr += '\n\t     name: ' + datafile.name + '\n\t     local_status: '
                         if datafile.local_status == 0:
                             filestr += ' present, '
                         elif datafile.local_status == 1:
                             filestr += ' missing, '
                         else:
                             filestr += ' in transit, '
                         filestr += '\n\t     remote_status: '
                         if datafile.remote_status == 0:
                             filestr += ' present'
                         elif datafile.remote_status == 1:
                             filestr += ' missing'
                         else:
                             filestr += ' in transit'
                         filestr += '\n\t     local_size: ' + \
                             str(datafile.local_size)
                         filestr += '\n\t     local_path: ' + datafile.local_path
                         filestr += '\n\t     remote_path: ' + datafile.remote_path + '\n'
                         fp.write(filestr)
         except Exception as e:
             print_debug(e)
         finally:
             if self._mutex.locked():
                 self._mutex.release()
    def report_files_local(self):
        """
        Return a string in the format 'X of Y files availabe locally' where X is the number here, and Y is the total
        """
        q = (DataFile.select(DataFile.local_status).where(
            DataFile.local_status == FileStatus.PRESENT.value))
        local = len([x.local_status for x in q.execute()])

        q = (DataFile.select(DataFile.local_status))
        total = len([x.local_status for x in q.execute()])

        msg = '{local}/{total} files available locally or {prec:.2f}%'.format(
            local=local, total=total, prec=((local * 1.0) / total) * 100)
        return msg
 def test_filemanager_update_remote_no_sta(self):
     sta = False
     types = ['atm', 'ice', 'ocn', 'rest', 'streams.cice', 'streams.ocean']
     database = 'test.db'
     simstart = 51
     simend = 60
     experiment = '20171011.beta2_FCT2-icedeep_branch.A_WCYCL1850S.ne30_oECv3_ICG.edison'
     filemanager = FileManager(mutex=self.mutex,
                               sta=sta,
                               types=types,
                               database=database,
                               remote_endpoint=self.remote_endpoint,
                               remote_path=self.remote_path,
                               local_endpoint=self.local_endpoint,
                               local_path=self.local_path)
     filemanager.populate_file_list(simstart=simstart,
                                    simend=simend,
                                    experiment=experiment)
     client = get_client()
     filemanager.update_remote_status(client)
     self.mutex.acquire()
     for datafile in DataFile.select():
         self.assertEqual(datafile.remote_status, 0)
     if self.mutex.locked():
         self.mutex.release()
     self.assertTrue(filemanager.all_data_remote())
 def test_filemanager_update_remote_yes_sta(self):
     sta = True
     types = ['atm', 'ice', 'ocn', 'streams.cice', 'streams.ocean']
     database = 'test.db'
     simstart = 51
     source_path = '/global/cscratch1/sd/golaz/ACME_simulations/20170915.beta2.A_WCYCL1850S.ne30_oECv3_ICG.edison'
     simend = 60
     experiment = '20170915.beta2.A_WCYCL1850S.ne30_oECv3_ICG.edison'
     filemanager = FileManager(mutex=self.mutex,
                               sta=sta,
                               types=types,
                               database=database,
                               remote_endpoint=self.remote_endpoint,
                               remote_path=source_path,
                               local_endpoint=self.local_endpoint,
                               local_path=self.local_path)
     filemanager.populate_file_list(simstart=simstart,
                                    simend=simend,
                                    experiment=experiment)
     client = get_client()
     filemanager.update_remote_status(client)
     self.mutex.acquire()
     for datafile in DataFile.select():
         if datafile.remote_status != 0:
             print datafile.name, datafile.remote_path
         self.assertEqual(datafile.remote_status, 0)
     if self.mutex.locked():
         self.mutex.release()
     self.assertTrue(filemanager.all_data_remote())
    def test_filemanager_populate(self):
        sta = False
        types = ['atm', 'ice', 'ocn', 'rest', 'streams.cice', 'streams.ocean']
        database = 'test.db'
        simstart = 51
        simend = 60
        experiment = '20171011.beta2_FCT2-icedeep_branch.A_WCYCL1850S.ne30_oECv3_ICG.edison'
        filemanager = FileManager(mutex=self.mutex,
                                  sta=sta,
                                  types=types,
                                  database=database,
                                  remote_endpoint=self.remote_endpoint,
                                  remote_path=self.remote_path,
                                  local_endpoint=self.local_endpoint,
                                  local_path=self.local_path)
        filemanager.populate_file_list(simstart=simstart,
                                       simend=simend,
                                       experiment=experiment)

        simlength = simend - simstart + 1
        atm_file_names = [
            x.name for x in DataFile.select().where(DataFile.datatype == 'atm')
        ]
        self.assertTrue(len(atm_file_names) == (simlength * 12))

        for year in range(simstart, simend + 1):
            for month in range(1, 13):
                name = '{exp}.cam.h0.{year:04d}-{month:02d}.nc'.format(
                    exp=experiment, year=year, month=month)
                self.assertTrue(name in atm_file_names)
Esempio n. 12
0
    def update_local_status(self):
        """
        Update the database with the local status of the expected files

        Parameters:
            types (list(str)): the list of files types to expect, must be members of file_type_map
        """

        self.mutex.acquire()
        try:
            datafiles = DataFile.select().where(
                DataFile.local_status == filestatus['NOT_EXIST'])
            for datafile in datafiles:
                should_save = False
                if os.path.exists(datafile.local_path):
                    local_size = os.path.getsize(datafile.local_path)
                    if local_size == datafile.remote_size:
                        datafile.local_status = filestatus['EXISTS']
                        datafile.local_size = local_size
                        should_save = True
                    if local_size != datafile.local_size \
                            or should_save:
                        datafile.local_size = local_size
                        datafile.save()
        except Exception as e:
            print_debug(e)
        finally:
            if self.mutex.locked():
                self.mutex.release()
Esempio n. 13
0
    def _handle_transfer(self, transfer, event, event_list):
        self.active_transfers += 1
        event_list.push(message='Starting file transfer')
        transfer.execute(event)
        print 'Transfer complete'
        self.active_transfers -= 1

        if transfer.status == JobStatus.FAILED:
            message = "Transfer has failed"
            print message
            logging.error(message)
            event_list.push(message='Tranfer failed')
            return

        self.mutex.acquire()
        names = [x['name'] for x in transfer.file_list]
        for datafile in DataFile.select().where(DataFile.name << names):
            if os.path.exists(datafile.local_path) \
                    and os.path.getsize(datafile.local_path) == datafile.remote_size:
                datafile.local_status = filestatus['EXISTS']
                datafile.local_size = os.path.getsize(datafile.local_path)
            else:
                print 'file transfer error on {}'.format(datafile.name)
                datafile.local_status = filestatus['NOT_EXIST']
                datafile.local_size = 0
            datafile.save()
        try:
            if self.mutex.locked():
                self.mutex.release()
        except:
            pass
Esempio n. 14
0
 def print_db(self):
     self.mutex.acquire()
     for df in DataFile.select():
         print {
             'name': df.name,
             'local_path': df.local_path,
             'remote_path': df.remote_path
         }
Esempio n. 15
0
File: listing.py Progetto: bfeng/dfs
  def post(self):
    data_files = DataFile.all()

    result = {"type":"array", "value":[]}
    for data_file in data_files:
      result["value"].append(data_file.f_key)

    write_json(self, result)
Esempio n. 16
0
File: check.py Progetto: bfeng/dfs
    def post(self):
        key = urllib.unquote(self.request.get("key"))

        query = DataFile.gql("WHERE f_key = :1", key)
        if query.count() >= 1:
            write_boolean(self, True)
        else:
            write_boolean(self, False)
Esempio n. 17
0
 def print_db(self):
     for df in DataFile.select():
         print {
             'case': df.case,
             'type': df.datatype,
             'name': df.name,
             'local_path': df.local_path,
             'remote_path': df.remote_path,
             'transfer_type': df.transfer_type,
         }
Esempio n. 18
0
    def __init__(self, database, types, sta=False, **kwargs):
        """
        Parameters:
            mutex (theading.Lock) the mutext for accessing the database
            sta (bool) is this run short term archived or not (1) yes (0) no
            types (list(str)): A list of strings of datatypes
            database (str): the path to where to create the sqlite database file
            remote_endpoint (str): the Globus UUID for the remote endpoint
            remote_path (str): the base directory to search for this runs model output
            local_endpoint (str): The Globus UUID for the local endpoint
            local_path (str): the local project path
        """
        self.mutex = kwargs['mutex']
        self.sta = sta
        self.updated_rest = False
        self.types = types if isinstance(types, list) else [types]
        self.active_transfers = 0
        self.db_path = database
        self.mutex.acquire()
        DataFile._meta.database.init(database)
        if DataFile.table_exists():
            DataFile.drop_table()
        DataFile.create_table()
        if self.mutex.locked():
            self.mutex.release()
        self.remote_endpoint = kwargs.get('remote_endpoint')
        self.local_path = kwargs.get('local_path')
        self.local_endpoint = kwargs.get('local_endpoint')
        self.start_year = 0

        head, tail = os.path.split(kwargs.get('remote_path'))
        if not self.sta:
            if tail != 'run':
                self.remote_path = os.path.join(kwargs.get('remote_path'),
                                                'run')
            else:
                self.remote_path = kwargs.get('remote_path')
        else:
            if tail == 'run':
                self.remote_path = head
            else:
                self.remote_path = kwargs.get('remote_path')
Esempio n. 19
0
 def get_file_paths_by_year(self, start_year, end_year, _type):
     self.mutex.acquire()
     try:
         if _type in [
                 'rest', 'streams.ocean', 'streams.cice', 'mpas-cice_in',
                 'mpas-o_in', 'meridionalHeatTransport'
         ]:
             datafiles = DataFile.select().where(DataFile.datatype == _type)
         else:
             datafiles = DataFile.select().where(
                 (DataFile.datatype == _type)
                 & (DataFile.year >= start_year)
                 & (DataFile.year <= end_year))
         files = [x.local_path for x in datafiles]
     except Exception as e:
         print_debug(e)
         files = []
     finally:
         if self.mutex.locked():
             self.mutex.release()
     return files
 def print_db(self):
     self._mutex.acquire()
     for df in DataFile.select():
         print {
             'case': df.case,
             'type': df.datatype,
             'name': df.name,
             'local_path': df.local_path,
             'remote_path': df.remote_path,
             'transfer_type': df.transfer_type,
         }
     self._mutex.release()
Esempio n. 21
0
File: remove.py Progetto: bfeng/dfs
  def post(self):
    key = urllib.unquote(self.request.get('key'))

    query = DataFile.all().filter('f_key =', key)
    data_file = query.get()
    if data_file is None:
      write_boolean(self, False)
    else:
      if data_file.f_value:
          data_file.f_value.delete()
      data_file.delete()
      write_boolean(self, True)
Esempio n. 22
0
 def all_data_remote(self):
     self.mutex.acquire()
     try:
         for data in DataFile.select():
             if data.remote_status != filestatus['EXISTS']:
                 return False
     except Exception as e:
         print_debug(e)
     finally:
         if self.mutex.locked():
             self.mutex.release()
     return True
Esempio n. 23
0
    def __init__(self, event_list, config, database='processflow.db'):
        """
        Parameters:
            database (str): the path to where to create the sqlite database file
            config (dict): the global configuration dict
        """
        self._event_list = event_list
        self._db_path = database
        self._config = config

        if os.path.exists(database):
            os.remove(database)

        DataFile._meta.database.init(database)
        if DataFile.table_exists():
            DataFile.drop_table()

        DataFile.create_table()

        self.thread_list = list()
        self.kill_event = threading.Event()
 def get_endpoints(self):
     """
     Return a list of globus endpoints for all cases
     """
     self._mutex.acquire()
     q = (DataFile.select().where(DataFile.transfer_type == 'globus'))
     endpoints = list()
     for x in q.execute():
         if x.remote_uuid not in endpoints:
             endpoints.append(x.remote_uuid)
     self._mutex.release()
     return endpoints
Esempio n. 25
0
File: find.py Progetto: bfeng/dfs
  def post(self):
    key = urllib.unquote(self.request.get('key'))

    if memcache.get(key="turn") == 'on':
      data = memcache.get(key)
      if data is not None:
        write_string(self, data)
        return

    query = DataFile.all().filter('f_key =', key)

    data_file = query.get()

    if data_file:
      blob_reader = blobstore.BlobReader(data_file.f_value, buffer_size=1048576)
      for line in blob_reader:
        write_string(self, line)
    else:
      write_boolean(self, False)
    def update_local_status(self):
        """
        Update the database with the local status of the expected files

        Return True if there was new local data found, False othewise
        """
        self._mutex.acquire()
        try:
            query = (DataFile.select().where(
                (DataFile.local_status == FileStatus.NOT_PRESENT.value)
                | (DataFile.local_status == FileStatus.IN_TRANSIT.value)))
            printed = False
            change = False
            for datafile in query.execute():
                marked = False
                if os.path.exists(datafile.local_path):
                    if datafile.local_status == FileStatus.NOT_PRESENT.value or datafile.local_status == FileStatus.IN_TRANSIT.value:
                        datafile.local_status = FileStatus.PRESENT.value
                        marked = True
                        change = True
                else:
                    if datafile.transfer_type == 'local':
                        msg = '{case} transfer_type is local, but {filename} is not present'.format(
                            case=datafile.case, filename=datafile.name)
                        logging.error(msg)
                        if not printed:
                            print_line(msg, self._event_list)
                            printed = True
                    if datafile.local_status == FileStatus.PRESENT.value:
                        datafile.local_status = FileStatus.NOT_PRESENT.value
                        marked = True
                if marked:
                    datafile.save()
        except OperationalError as operror:
            line = 'Error writing to database, database is locked by another process'
            print_line(line=line, event_list=self._event_list)
            logging.error(line)
        finally:
            if self._mutex.locked():
                self._mutex.release()
        return change
Esempio n. 27
0
    def years_ready(self, start_year, end_year):
        """
        Checks if atm files exist from start year to end of endyear

        Parameters:
            start_year (int): the first year to start checking
            end_year (int): the last year to check for
        Returns:
            -1 if no data present
            0 if partial data present
            1 if all data present
        """
        data_ready = True
        non_zero_data = False

        self.mutex.acquire()
        try:
            datafiles = DataFile.select().where((DataFile.datatype == 'atm')
                                                & (DataFile.year >= start_year)
                                                & (DataFile.year <= end_year))
            for datafile in datafiles:
                if datafile.local_status in [
                        filestatus['NOT_EXIST'], filestatus['IN_TRANSIT']
                ]:
                    data_ready = False
                else:
                    non_zero_data = True
        except Exception as e:
            print_debug(e)
        finally:
            if self.mutex.locked():
                self.mutex.release()

        if data_ready:
            return 1
        elif not data_ready and non_zero_data:
            return 0
        elif not data_ready and not non_zero_data:
            return -1
    def test_filemanager_all_data_local(self):
        sta = True
        types = ['atm', 'ice', 'ocn', 'rest', 'streams.cice', 'streams.ocean']
        database = 'test.db'
        simstart = 51
        simend = 60
        experiment = '20171011.beta2_FCT2-icedeep_branch.A_WCYCL1850S.ne30_oECv3_ICG.edison'
        if os.path.exists(self.local_path):
            shutil.rmtree(self.local_path)
        filemanager = FileManager(mutex=self.mutex,
                                  sta=sta,
                                  types=types,
                                  database=database,
                                  remote_endpoint=self.remote_endpoint,
                                  remote_path=self.remote_path,
                                  local_endpoint=self.local_endpoint,
                                  local_path=self.local_path)
        filemanager.populate_file_list(simstart=simstart,
                                       simend=simend,
                                       experiment=experiment)
        filemanager.update_local_status()
        self.assertFalse(filemanager.all_data_local())

        self.mutex.acquire()
        for df in DataFile.select():
            name = df.name
            head, tail = os.path.split(df.local_path)
            if not os.path.exists(head):
                os.makedirs(head)
            with open(df.local_path, 'w') as fp:
                fp.write('this is a test file')
            size = os.path.getsize(df.local_path)
            df.remote_size = size
            df.local_size = size
            df.save()
        if self.mutex.locked():
            self.mutex.release()
        filemanager.update_local_status()
        self.assertTrue(filemanager.all_data_local())
 def all_data_local(self):
     """
     Returns True if all data is local, False otherwise
     """
     self._mutex.acquire()
     try:
         query = (DataFile.select().where(
             (DataFile.local_status == FileStatus.NOT_PRESENT.value)
             | (DataFile.local_status == FileStatus.IN_TRANSIT.value)))
         missing_data = query.execute()
         # if any of the data is missing, not all data is local
         if missing_data:
             logging.debug('All data is not local, missing the following')
             logging.debug([x.name for x in missing_data])
             return False
     except Exception as e:
         print_debug(e)
     finally:
         if self._mutex.locked():
             self._mutex.release()
     logging.debug('All data is local')
     return True
    def years_ready(self, data_type, start_year, end_year):
        """
        Checks if data_type files exist from start year to end of endyear

        Parameters:
            start_year (int): the first year to start checking
            end_year (int): the last year to check for
        Returns:
            -1 if no data present
            0 if partial data present
            1 if all data present
        """
        data_ready = True
        non_zero_data = False

        self._mutex.acquire()
        try:
            query = (DataFile.select().where((DataFile.datatype == data_type)
                                             & (DataFile.year >= start_year)
                                             & (DataFile.year <= end_year)))
            for datafile in query.execute():
                if datafile.local_status != FileStatus.NOT_PRESENT.value:
                    data_ready = False
                else:
                    non_zero_data = True
        except Exception as e:
            print_debug(e)
        finally:
            if self._mutex.locked():
                self._mutex.release()

        if data_ready:
            return 1
        elif not data_ready and non_zero_data:
            return 0
        elif not data_ready and not non_zero_data:
            return -1
Esempio n. 31
0
    def populate_file_list(self, simstart, simend, experiment):
        """
        Populate the database with the required DataFile entries

        Parameters:
            simstart (int): the start year of the simulation,
            simend (int): the end year of the simulation,
            experiment (str): the name of the experiment
                ex: 20170915.beta2.A_WCYCL1850S.ne30_oECv3_ICG.edison
        """
        print 'Creating file table'
        if self.sta:
            print 'Using short term archive'
        else:
            print 'Short term archive turned off'
        if not self.start_year:
            self.start_year = simstart
        newfiles = []
        with DataFile._meta.database.atomic():
            for _type in self.types:
                if _type not in file_type_map:
                    continue
                if _type == 'rest':
                    self.populate_handle_rest(simstart, newfiles)
                elif _type in [
                        'streams.ocean', 'streams.cice', 'mpas-o_in',
                        'mpas-cice_in'
                ]:
                    self.populate_handle_mpas(_type, newfiles)
                elif _type == 'meridionalHeatTransport':
                    self.populate_heat_transport(newfiles)
                else:
                    local_base = os.path.join(self.local_path, _type)
                    if not os.path.exists(local_base):
                        os.makedirs(local_base)
                    for year in xrange(simstart, simend + 1):
                        for month in xrange(1, 13):
                            if _type == 'atm':
                                name = file_type_map[_type].replace(
                                    'EXPERIMENT', experiment)
                            else:
                                name = file_type_map[_type]
                            yearstr = '{0:04d}'.format(year)
                            monthstr = '{0:02d}'.format(month)
                            name = name.replace('YEAR', yearstr)
                            name = name.replace('MONTH', monthstr)
                            local_path = os.path.join(local_base, name)
                            if self.sta:
                                remote_path = os.path.join(
                                    self.remote_path, 'archive', _type, 'hist',
                                    name)
                            else:
                                remote_path = os.path.join(
                                    self.remote_path, name)
                            newfiles = self._add_file(newfiles=newfiles,
                                                      name=name,
                                                      local_path=local_path,
                                                      remote_path=remote_path,
                                                      _type=_type,
                                                      year=year,
                                                      month=month)
            print 'Inserting file data into the table'
            self.mutex.acquire()
            try:
                step = 50
                for idx in range(0, len(newfiles), step):
                    DataFile.insert_many(newfiles[idx:idx + step]).execute()
            except Exception as e:
                print_debug(e)
            finally:
                if self.mutex.locked():
                    self.mutex.release()
            print 'Database update complete'
Esempio n. 32
0
    def populate_file_list(self):
        """
        Populate the database with the required DataFile entries
        """
        msg = 'Creating file table'
        print_line(
            line=msg,
            event_list=self._event_list)
        newfiles = list()
        start_year = int(self._config['simulations']['start_year'])
        end_year = int(self._config['simulations']['end_year'])
        with DataFile._meta.database.atomic():
            # for each case
            for case in self._config['simulations']:
                if case in ['start_year', 'end_year', 'comparisons']:
                    continue
                # for each data type
                for _type in self._config['data_types']:
                    data_types_for_case = self._config['simulations'][case]['data_types']
                    if 'all' not in data_types_for_case:
                        if _type not in data_types_for_case:
                            continue

                    # setup the base local_path
                    local_path = self.render_file_string(
                        data_type=_type,
                        data_type_option='local_path',
                        case=case)

                    new_files = list()
                    if self._config['data_types'][_type].get('monthly') and self._config['data_types'][_type]['monthly'] in ['True', 'true', '1', 1]:
                        # handle monthly data
                        for year in range(start_year, end_year + 1):
                            for month in range(1, 13):
                                filename = self.render_file_string(
                                    data_type=_type,
                                    data_type_option='file_format',
                                    case=case,
                                    year=year,
                                    month=month)
                                r_path = self.render_file_string(
                                    data_type=_type,
                                    data_type_option='remote_path',
                                    case=case,
                                    year=year,
                                    month=month)
                                new_files.append({
                                    'name': filename,
                                    'remote_path': os.path.join(r_path, filename),
                                    'local_path': os.path.join(local_path, filename),
                                    'local_status': FileStatus.NOT_PRESENT.value,
                                    'case': case,
                                    'remote_status': FileStatus.NOT_PRESENT.value,
                                    'year': year,
                                    'month': month,
                                    'datatype': _type,
                                    'local_size': 0,
                                    'transfer_type': self._config['simulations'][case]['transfer_type'],
                                    'remote_uuid': self._config['simulations'][case].get('remote_uuid', ''),
                                    'remote_hostname': self._config['simulations'][case].get('remote_hostname', '')
                                })
                    else:
                        # handle one-off data
                        filename = self.render_file_string(
                                    data_type=_type,
                                    data_type_option='file_format',
                                    case=case)
                        r_path = self.render_file_string(
                                    data_type=_type,
                                    data_type_option='remote_path',
                                    case=case)
                        new_files.append({
                            'name': filename,
                            'remote_path': os.path.join(r_path, filename),
                            'local_path': os.path.join(local_path, filename),
                            'local_status': FileStatus.NOT_PRESENT.value,
                            'case': case,
                            'remote_status': FileStatus.NOT_PRESENT.value,
                            'year': 0,
                            'month': 0,
                            'datatype': _type,
                            'local_size': 0,
                            'transfer_type': self._config['simulations'][case]['transfer_type'],
                            'remote_uuid': self._config['simulations'][case].get('remote_uuid', ''),
                            'remote_hostname': self._config['simulations'][case].get('remote_hostname', '')
                        })
                    tail, _ = os.path.split(new_files[0]['local_path'])
                    if not os.path.exists(tail):
                        os.makedirs(tail)
                    step = 50
                    for idx in range(0, len(new_files), step):
                        DataFile.insert_many(
                            new_files[idx: idx + step]).execute()

            msg = 'Database update complete'
            print_line(msg, self._event_list)
Esempio n. 33
0
    def update_remote_status(self, client):
        """
        Check remote location for existance of the files on our list
        If they exist, update their status in the DB

        Parameters:
            client (globus_sdk.client): the globus client to use for remote query
        """
        result = client.endpoint_autoactivate(self.remote_endpoint,
                                              if_expires_in=2880)
        if result['code'] == "AutoActivationFailed":
            return False
        if self.sta:
            for _type in self.types:
                if _type == 'rest':
                    if not self.updated_rest:
                        self.mutex.acquire()
                        name, path, size = self.update_remote_rest_sta_path(
                            client)
                        DataFile.update(
                            remote_status=filestatus['EXISTS'],
                            remote_size=size,
                            remote_path=path,
                            name=name).where(
                                DataFile.datatype == 'rest').execute()
                        if self.mutex.locked():
                            self.mutex.release()
                        self.updated_rest = True
                    continue
                elif _type in [
                        'streams.ocean', 'streams.cice', 'mpas-o_in',
                        'mpas-cice_in'
                ]:
                    remote_path = os.path.join(self.remote_path, 'run')
                elif _type == 'meridionalHeatTransport':
                    remote_path = os.path.join(self.remote_path, 'archive',
                                               'ocn', 'hist')
                else:
                    remote_path = os.path.join(self.remote_path, 'archive',
                                               _type, 'hist')
                print 'Querying globus for {}'.format(_type)
                res = self._get_ls(client=client, path=remote_path)

                self.mutex.acquire()
                try:
                    names = [
                        x.name for x in DataFile.select().where(
                            DataFile.datatype == _type)
                    ]
                    to_update_name = [
                        x['name'] for x in res if x['name'] in names
                    ]
                    to_update_size = [
                        x['size'] for x in res if x['name'] in names
                    ]
                    q = DataFile.update(
                        remote_status=filestatus['EXISTS'],
                        remote_size=to_update_size[to_update_name.index(
                            DataFile.name)]).where(
                                (DataFile.name << to_update_name)
                                & (DataFile.datatype == _type))
                    n = q.execute()
                except Exception as e:
                    print_debug(e)
                    print "Do you have the correct start and end dates?"
                finally:
                    if self.mutex.locked():
                        self.mutex.release()
        else:

            remote_path = self.remote_path
            res = self._get_ls(client=client, path=remote_path)
            self.mutex.acquire()
            try:
                for _type in self.types:
                    names = [
                        x.name for x in DataFile.select().where(
                            DataFile.datatype == _type)
                    ]
                    to_update_name = [
                        x['name'] for x in res if x['name'] in names
                    ]
                    to_update_size = [
                        x['size'] for x in res if x['name'] in names
                    ]

                    q = DataFile.update(
                        remote_status=filestatus['EXISTS'],
                        remote_size=to_update_size[to_update_name.index(
                            DataFile.name)]).where(
                                (DataFile.name << to_update_name)
                                & (DataFile.datatype == _type))
                    n = q.execute()
                    print 'updated {} records'.format(n)
            except Exception as e:
                print_debug(e)
            finally:
                if self.mutex.locked():
                    self.mutex.release()
Esempio n. 34
0
    def transfer_needed(self, event_list, event, remote_endpoint, ui,
                        display_event, emailaddr, thread_list):
        """
        Start a transfer job for any files that arent local, but do exist remotely

        Globus user must already be logged in

        Parameters:
            event_list (EventList): the list to push information into
            event (threadding.event): the thread event to trigger a cancel
        """
        if self.active_transfers >= 2:
            return False
        # required files dont exist locally, do exist remotely
        # or if they do exist locally have a different local and remote size
        self.mutex.acquire()
        try:
            required_files = [
                x for x in DataFile.select().where(
                    (DataFile.remote_status == filestatus['EXISTS'])
                    & (DataFile.local_status != filestatus['IN_TRANSIT'])
                    & ((DataFile.local_status == filestatus['NOT_EXIST'])
                       | (DataFile.local_size != DataFile.remote_size)))
            ]
            if len(required_files) == 0:
                return False
            target_files = []
            target_size = 1e11  # 100 GB
            total_size = 0
            for file in required_files:
                if total_size + file.remote_size < target_size:
                    target_files.append({
                        'name': file.name,
                        'local_size': file.local_size,
                        'local_path': file.local_path,
                        'local_status': file.local_status,
                        'remote_size': file.remote_size,
                        'remote_path': file.remote_path,
                        'remote_status': file.remote_status
                    })
                    total_size += file.remote_size
                else:
                    break
        except Exception as e:
            print_debug(e)
            return False
        finally:
            if self.mutex.locked():
                self.mutex.release()

        logging.info('Transfering required files')
        print 'total transfer size {size} gigabytes for {nfiles} files'.format(
            size=(total_size / 1e9), nfiles=len(target_files))
        transfer_config = {
            'file_list': target_files,
            'source_endpoint': self.remote_endpoint,
            'destination_endpoint': self.local_endpoint,
            'source_path': self.remote_path,
            'destination_path': self.local_path,
            'source_email': emailaddr,
            'display_event': display_event,
            'ui': ui,
        }
        transfer = Transfer(config=transfer_config, event_list=event_list)
        print 'starting transfer for:'
        transfer_names = [x['name'] for x in transfer.file_list]
        for file in transfer.file_list:
            print '   ' + file['name']
            logging.info(file['name'])
        self.mutex.acquire()
        try:
            DataFile.update(local_status=filestatus['IN_TRANSIT']).where(
                DataFile.name << transfer_names).execute()
            print 'following files are in transit'
            for df in DataFile.select():
                if df.local_status == filestatus['IN_TRANSIT']:
                    print '   ' + df.name
        except Exception as e:
            print_debug(e)
            return False
        finally:
            if self.mutex.locked():
                self.mutex.release()

        args = (transfer, event, event_list)
        thread = threading.Thread(target=self._handle_transfer,
                                  name='filemanager_transfer',
                                  args=args)
        thread_list.append(thread)
        thread.start()
        return True
Esempio n. 35
0
def write_human_state(event_list, job_sets, mutex, state_path='run_state.txt', print_file_list=False):
    """
    Writes out a human readable representation of the current execution state

    Paremeters
        event_list (Event_list): The global list of all events
        job_sets (list: YearSet): The global list of all YearSets
        state_path (str): The path to where to write the run_state
        ui_mode (bool): The UI mode, True if the UI is on, False if the UI is off
    """
    try:
        with open(state_path, 'w') as outfile:
            line = "Execution state as of {0}\n".format(
                datetime.now().strftime('%d, %b %Y %I:%M'))
            out_str = line
            out_str += 'Running under process {0}\n\n'.format(os.getpid())

            for year_set in job_sets:
                line = 'Year_set {num}: {start} - {end}\n'.format(
                    num=year_set.set_number,
                    start=year_set.set_start_year,
                    end=year_set.set_end_year)
                out_str += line

                line = 'status: {status}\n'.format(
                    status=year_set.status)
                out_str += line

                for job in year_set.jobs:
                    line = '  >   {type} -- {id}: {status}\n'.format(
                        type=job.type,
                        id=job.job_id,
                        status=job.status)
                    out_str += line

                out_str += '\n'

            out_str += '\n'
            for line in event_list.list:
                if 'Transfer' in line.message:
                    continue
                if 'hosted' in line.message:
                    continue
                out_str += line.message + '\n'

            # out_str += line.message + '\n'
            for line in event_list.list:
                if 'Transfer' not in line.message:
                    continue
                out_str += line.message + '\n'

            for line in event_list.list:
                if 'hosted' not in line.message:
                    continue
                out_str += line.message + '\n'
            outfile.write(out_str)
            # if not ui_mode:
            #     print '\n'
            #     print out_str
            #     print '\n================================================\n'
    except Exception as e:
        logging.error(format_debug(e))
        return

    if print_file_list:
        head, _ = os.path.split(state_path)
        file_list_path = os.path.join(head, 'file_list.txt')
        if not os.path.exists(head):
            os.makedirs(head)
        with open(file_list_path, 'w') as fp:
            mutex.acquire()
            types = [x.datatype for x in DataFile.select(
                DataFile.datatype).distinct()]
            try:
                for _type in types:
                    fp.write('===================================\n')
                    fp.write(_type + ':\n')
                    datafiles = DataFile.select().where(DataFile.datatype == _type)
                    for datafile in datafiles:

                        filestr = '------------------------------------------'
                        filestr += '\n     name: ' + datafile.name + '\n     local_status: '
                        if datafile.local_status == 0:
                            filestr += ' present, '
                        elif datafile.local_status == 1:
                            filestr += ' missing, '
                        else:
                            filestr += ' in transit, '
                        filestr += '\n     remote_status: '
                        if datafile.remote_status == 0:
                            filestr += ' present'
                        elif datafile.remote_status == 1:
                            filestr += ' missing'
                        else:
                            filestr += ' in transit'
                        filestr += '\n     local_size: ' + \
                            str(datafile.local_size)
                        filestr += '\n     local_path: ' + datafile.local_path
                        filestr += '\n     remote_size: ' + \
                            str(datafile.remote_size)
                        filestr += '\n     remote_path: ' + datafile.remote_path + '\n'
                        fp.write(filestr)
            except Exception as e:
                print_debug(e)
            finally:
                if mutex.locked():
                    mutex.release()
    #     """
    #     for string, val in kwargs.items():
    #         if string in instring:
    #             instring = instring.replace(string, val)
    #     return instring

    def check_data_ready(self,
                         data_required,
                         case,
                         start_year=None,
                         end_year=None):
        self._mutex.acquire()
        try:
            for datatype in data_required:
                if start_year and end_year:
                    q = (DataFile.select().where(
                        (DataFile.year >= start_year)
                        & (DataFile.year <= end_year) & (DataFile.case == case)
                        & (DataFile.datatype == datatype)))
                else:
                    q = (DataFile.select().where((DataFile.case == case) & (
                        DataFile.datatype == datatype)))
                datafiles = q.execute()
                for df in datafiles:
                    if not os.path.exists(
                            df.local_path
                    ) and df.local_status == FileStatus.PRESENT.value:
                        df.local_status = FileStatus.NOT_PRESENT.value
                        df.save()
                    elif os.path.exists(
                            df.local_path
    def transfer_needed(self, event_list, event):
        """
        Start a transfer job for any files that arent local, but do exist remotely

        Globus user must already be logged in
        """

        # required files dont exist locally, do exist remotely
        # or if they do exist locally have a different local and remote size
        target_files = list()
        self._mutex.acquire()
        try:
            q = (DataFile.select(DataFile.case).where(
                DataFile.local_status == FileStatus.NOT_PRESENT.value))
            caselist = [x.case for x in q.execute()]
            if not caselist or len(caselist) == 0:
                return
            cases = list()
            for case in caselist:
                if case not in cases:
                    cases.append(case)

            for case in cases:
                q = (DataFile.select().where((DataFile.case == case) & (
                    DataFile.local_status == FileStatus.NOT_PRESENT.value)))
                required_files = [x for x in q.execute()]
                for file in required_files:
                    if file.transfer_type == 'local':
                        required_files.remove(file)
                if not required_files:
                    msg = 'ERROR: all missing files are marked as local'
                    print_line(msg, self._event_list)
                    return
                # mark files as in-transit so we dont double-copy
                q = (DataFile.update({
                    DataFile.local_status:
                    FileStatus.IN_TRANSIT
                }).where(DataFile.name << [x.name for x in required_files]))
                q.execute()

                for file in required_files:
                    target_files.append({
                        'local_path': file.local_path,
                        'remote_path': file.remote_path,
                    })

                if required_files[0].transfer_type == 'globus':
                    msg = 'Starting globus file transfer of {} files'.format(
                        len(required_files))
                    print_line(msg, self._event_list)
                    msg = 'See https://www.globus.org/app/activity for transfer details'
                    print_line(msg, self._event_list)

                    client = get_client()
                    remote_uuid = required_files[0].remote_uuid
                    local_uuid = self._config['global']['local_globus_uuid']
                    thread_name = '{}_globus_transfer'.format(
                        required_files[0].case)
                    _args = (client, remote_uuid, local_uuid, target_files,
                             self.kill_event)
                    thread = Thread(target=globus_transfer,
                                    name=thread_name,
                                    args=_args)
                    self.thread_list.append(thread)
                    thread.start()
                elif required_files[0].transfer_type == 'sftp':
                    msg = 'Starting sftp file transfer of {} files'.format(
                        len(required_files))
                    print_line(msg, self._event_list)

                    client = get_ssh_client(required_files[0].remote_hostname)
                    thread_name = '{}_sftp_transfer'.format(
                        required_files[0].case)
                    _args = (target_files, client, self.kill_event)
                    thread = Thread(target=self._ssh_transfer,
                                    name=thread_name,
                                    args=_args)
                    self.thread_list.append(thread)
                    thread.start()
        except Exception as e:
            print_debug(e)
            return False
        finally:
            if self._mutex.locked():
                self._mutex.release()