Ejemplo n.º 1
0
    def swestore(self):
        """This function is the entry point for tasks having to do with packaging and sending runs to swestore
        """
        db_info = self.app.config.get_section_dict('db')
        f_conn = FlowcellRunMetricsConnection(username=db_info.get('user'),
                                              password=db_info.get('password'),
                                              url=db_info.get('url'))
        # Create a tarball out of the run folder
        if self.pargs.package_run:

            # We require a flowcell argument
            if not self._check_pargs(["flowcell"]):
                return

            self.pargs.tarball = package_run(self,self.config.get('archive','swestore_staging'), **vars(self.pargs))
            if not self.pargs.tarball:
                self.log.error("No tarball was created, exiting")
                return
            if self.pargs.clean:
                rm_run(self,self.config.get('archive','root'), flowcell=self.pargs.flowcell)

            if self.pargs.clean_from_staging:
                #Check that the run has been archived on the NAS before removing it, otherwise it will keep synching
                if self.pargs.flowcell in f_conn.get_storage_status('NAS_nosync').keys():
                    rm_run(self,self.config.get('archive','swestore_staging'), flowcell=self.pargs.flowcell)
                else:
                    self.log.warn("Run storage status is not NAS_nosync, not removing run from swestore_stage!")

        if not self.pargs.tarball:
            self.log.error("Required argument --tarball was not specified")
            return

        if not os.path.exists(self.pargs.tarball):
            self.log.error("Tarball {} does not exist".format(self.pargs.tarball))
            return

        # Upload a tarball to a remote host
        if self.pargs.remote_upload:
            result = upload_tarball(self,
                                    **dict(self.config.get_section_dict('archive').items() + vars(self.pargs).items()))
            if not result:
                return
            if self.pargs.clean:
                rm_tarball(self,tarball=self.pargs.tarball)

        # Send the tarball to Swestore using irods
        if self.pargs.send_to_swestore:
            result = send_to_swestore(self,**dict(self.config.get_section_dict('archive').items() + vars(self.pargs).items()))
            if not result:
                # If archiving failed, we need to give a non-zero exit code in order for a remote instance to detect the failure
                sys.exit(1)
            if self.pargs.clean:
                rm_tarball(self,tarball=self.pargs.tarball)
            #Set the run as archived in StatusDB
            fc_db_id = f_conn.id_view.get(self.pargs.flowcell)
            f_conn.set_storage_status(fc_db_id, 'swestore_archived')
            # Log to statusdb
            if self.pargs.log_to_db:
                # implement this
                raise NotImplementedError("logging to db functionality not implemented")
Ejemplo n.º 2
0
    def storage_cleanup(self):
        storage_conf = self.app.config.get_section_dict('storage')
        db_info = self.app.config.get_section_dict('db')
        f_conn = FlowcellRunMetricsConnection(username=db_info.get('user'),
                                              password=db_info.get('password'),
                                              url=db_info.get('url'))
        servers = [server for server in storage_conf.keys()]
        server = platform.node().split('.')[0].lower()
        if server in servers:
            self.app.log.info("Performing cleanup on production server \"{}\"...".format(server))
            dirs = [d.lstrip() for d in storage_conf.get(server).split(',')]

            #Collect old runs (> 30 days in nosync folder) to remove
            old_runs = []
            for d in dirs:
                nosync_dir = os.path.join(d, 'nosync')
                for fc in glob.iglob(os.path.join(nosync_dir, '1*')):
                    if os.path.isdir(fc):
                        fc_name = os.path.basename(fc)
                        #Check that there is no check file indicating to not remove the run
                        if not os.path.exists(os.path.join(fc, 'no_remove.txt')):
                            stats = os.stat(os.path.join(fc, 'RTAComplete.txt'))
                            mod_time = datetime.now() - datetime.fromtimestamp(stats.st_mtime)
                            if mod_time.days >= 30:
                                old_runs.append(fc)
                        else:
                            self.app.log.warn("no_remove.txt file found in {}, skipping run".format(fc_name))

            #NAS servers
            if 'nas' in server:
                #Collect newly finished runs
                fc_list = []
                for d in dirs:
                    for fc in glob.glob(os.path.join(d, '1*')):
                        if os.path.exists(os.path.join(fc, 'RTAComplete.txt')):
                            fc_list.append(fc)

                #Move to nosync
                retries = 5
                for fc in fc_list:
                    fc_name = os.path.basename(fc)
                    while retries:
                        if 'Finished' in last_lines(storage_conf.get('lsyncd_log'), 1)[0]:
                            break
                        retries -= 1
                        time.sleep(3)
                    if retries:
                        self.app.log.info("lsyncd process seems to be up to speed, and run {} " \
                                "is finished, moving it to nosync".format(fc_name))
                        shutil.move(fc, os.path.join(os.path.dirname(fc), 'nosync'))
                        #Touch RTAComplete.txt file to that the modification date is the date when
                        #it was moved to nosync
                        try:
                            open(os.path.join(os.path.dirname(fc), 'nosync', os.path.basename(fc), 'RTAComplete.txt'), 'w').close()
                        except IOError:
                            self.app.log.warn("No RTAComplete.txt file was found for run {}." \
                                    " Please check".format(os.path.basename(fc_name)))
                        fc_db_id = f_conn.id_view.get(fc_name)
                        if fc_db_id:
                            f_conn.set_storage_status(fc_db_id, 'NAS_nosync')
                        else:
                            self.app.log.warn("Flowcell {} not found in the database, not changing status.".format(fc_name))
                    else:
                        self.app.log.warn("lsyncd process doesn't seem to be finished. " \
                                "Skipping run {}".format(os.path.basename(fc)))

                #Remove old runs
                for fc in old_runs:
                    fc_name = os.path.basename(fc)
                    #Check that the run has been archived in swestore before removing permanently
                    if fc_name in f_conn.get_storage_status('swestore_archived').keys():
                        self.app.log.info("Run {} has been in nosync for more than 30 days " \
                            "and is archived in swestore. Permanently removing it from the NAS".format(fc_name))
                        shutil.rmtree(fc)
                    else:
                        self.app.log.warn("Run {} has been in nosync for more than 30 " \
                            "days, but has not yet been archived in swestore. " \
                            "Not removing, please check it".format(fc_name))

            #Processing servers (b5)
            else:
                #Collect finished runs
                fc_list = []
                for d in dirs:
                    for fc in glob.glob(os.path.join(d, '1*')):
                        if os.path.exists(os.path.join(fc, 'second_read_processing_completed.txt')):
                            fc_list.append(fc)

                #Move to nosync
                for fc in fc_list:
                    fc_name = os.path.basename(fc)
                    self.app.log.info("Moving run {} to nosync".format(fc_name))
                    shutil.move(fc, os.path.join(os.path.dirname(fc), 'nosync'))

                #Remove old runs
                for fc in old_runs:
                    fc_name = os.path.basename(fc)
                    self.app.log.info("Run {} has been in nosync for more than 30 " \
                        "days, permanently removing it from {}".format(fc_name, server))
                    shutil.rmtree(fc)
        else:
            self.app.log.warn("You're running the cleanup functionality in {}. But this " \
                    "server doen't seem to be on your pm.conf file. Are you on the correct server?".format(server))
Ejemplo n.º 3
0
    def storage_cleanup(self):
        storage_conf = self.app.config.get_section_dict('storage')
        db_info = self.app.config.get_section_dict('db')
        f_conn = FlowcellRunMetricsConnection(username=db_info.get('user'),
                                              password=db_info.get('password'),
                                              url=db_info.get('url'))
        servers = [server for server in storage_conf.keys()]
        server = platform.node().split('.')[0].lower()
        if server in servers:
            self.app.log.info(
                "Performing cleanup on production server \"{}\"...".format(
                    server))
            dirs = [d.lstrip() for d in storage_conf.get(server).split(',')]

            #Collect old runs (> 30 days in nosync folder) to remove
            old_runs = []
            for d in dirs:
                nosync_dir = os.path.join(d, 'nosync')
                for fc in glob.iglob(os.path.join(nosync_dir, '1*')):
                    if os.path.isdir(fc):
                        fc_name = os.path.basename(fc)
                        #Check that there is no check file indicating to not remove the run
                        if not os.path.exists(os.path.join(
                                fc, 'no_remove.txt')):
                            stats = os.stat(os.path.join(
                                fc, 'RTAComplete.txt'))
                            mod_time = datetime.now() - datetime.fromtimestamp(
                                stats.st_mtime)
                            if mod_time.days >= 30:
                                old_runs.append(fc)
                        else:
                            self.app.log.warn(
                                "no_remove.txt file found in {}, skipping run".
                                format(fc_name))

            #NAS servers
            if 'nas' in server:
                #Collect newly finished runs
                fc_list = []
                for d in dirs:
                    for fc in glob.glob(os.path.join(d, '1*')):
                        if os.path.exists(os.path.join(fc, 'RTAComplete.txt')):
                            fc_list.append(fc)

                #Move to nosync
                retries = 5
                for fc in fc_list:
                    fc_name = os.path.basename(fc)
                    while retries:
                        if 'Finished' in last_lines(
                                storage_conf.get('lsyncd_log'), 1)[0]:
                            break
                        retries -= 1
                        time.sleep(3)
                    if retries:
                        self.app.log.info("lsyncd process seems to be up to speed, and run {} " \
                                "is finished, moving it to nosync".format(fc_name))
                        shutil.move(
                            fc, os.path.join(os.path.dirname(fc), 'nosync'))
                        #Touch RTAComplete.txt file to that the modification date is the date when
                        #it was moved to nosync
                        try:
                            open(
                                os.path.join(os.path.dirname(fc), 'nosync',
                                             os.path.basename(fc),
                                             'RTAComplete.txt'), 'w').close()
                        except IOError:
                            self.app.log.warn("No RTAComplete.txt file was found for run {}." \
                                    " Please check".format(os.path.basename(fc_name)))
                        fc_db_id = f_conn.id_view.get(fc_name)
                        if fc_db_id:
                            f_conn.set_storage_status(fc_db_id, 'NAS_nosync')
                        else:
                            self.app.log.warn(
                                "Flowcell {} not found in the database, not changing status."
                                .format(fc_name))
                    else:
                        self.app.log.warn("lsyncd process doesn't seem to be finished. " \
                                "Skipping run {}".format(os.path.basename(fc)))

                #Remove old runs
                for fc in old_runs:
                    fc_name = os.path.basename(fc)
                    #Check that the run has been archived in swestore before removing permanently
                    if fc_name in f_conn.get_storage_status(
                            'swestore_archived').keys():
                        self.app.log.info("Run {} has been in nosync for more than 30 days " \
                            "and is archived in swestore. Permanently removing it from the NAS".format(fc_name))
                        shutil.rmtree(fc)
                    else:
                        self.app.log.warn("Run {} has been in nosync for more than 30 " \
                            "days, but has not yet been archived in swestore. " \
                            "Not removing, please check it".format(fc_name))

            #Processing servers (b5)
            else:
                #Collect finished runs
                fc_list = []
                for d in dirs:
                    for fc in glob.glob(os.path.join(d, '1*')):
                        if os.path.exists(
                                os.path.join(
                                    fc,
                                    'second_read_processing_completed.txt')):
                            fc_list.append(fc)

                #Move to nosync
                for fc in fc_list:
                    fc_name = os.path.basename(fc)
                    self.app.log.info(
                        "Moving run {} to nosync".format(fc_name))
                    shutil.move(fc, os.path.join(os.path.dirname(fc),
                                                 'nosync'))

                #Remove old runs
                for fc in old_runs:
                    fc_name = os.path.basename(fc)
                    self.app.log.info("Run {} has been in nosync for more than 30 " \
                        "days, permanently removing it from {}".format(fc_name, server))
                    shutil.rmtree(fc)
        else:
            self.app.log.warn("You're running the cleanup functionality in {}. But this " \
                    "server doen't seem to be on your pm.conf file. Are you on the correct server?".format(server))
Ejemplo n.º 4
0
    def swestore(self):
        """This function is the entry point for tasks having to do with packaging and sending runs to swestore
        """
        db_info = self.app.config.get_section_dict('db')
        f_conn = FlowcellRunMetricsConnection(username=db_info.get('user'),
                                              password=db_info.get('password'),
                                              url=db_info.get('url'))
        swestore_paths = set(self.config.get('archive','swestore_staging').split(','))
        run = self.pargs.tarball if self.pargs.tarball else self.pargs.flowcell
        swestore_dir = get_path_swestore_staging(run, swestore_paths)
        # Create a tarball out of the run folder
        if self.pargs.package_run:

            # We require a flowcell argument
            if not self._check_pargs(["flowcell"]):
                return

            self.pargs.tarball = package_run(self, swestore_dir, **vars(self.pargs))
            if not self.pargs.tarball:
                self.log.error("No tarball was created, exiting")
                return
            if self.pargs.clean:
                rm_run(self,self.config.get('archive','root'), flowcell=self.pargs.flowcell)

            if self.pargs.clean_from_staging:
                #Check that the run has been archived on the NAS before removing it, otherwise it will keep synching
                if self.pargs.flowcell in f_conn.get_storage_status('NAS_nosync').keys():
                    rm_run(self, swestore_dir, flowcell=self.pargs.flowcell)
                else:
                    self.log.warn("Run storage status is not NAS_nosync, not removing run from swestore_stage!")

        if not self.pargs.tarball:
            self.log.error("Required argument --tarball was not specified")
            return

        if not os.path.exists(os.path.join(swestore_dir, self.pargs.tarball)):
            self.log.error("Tarball {} does not exist".format(self.pargs.tarball))
            return

        # Upload a tarball to a remote host
        if self.pargs.remote_upload:
            result = upload_tarball(self,
                                    **dict(self.config.get_section_dict('archive').items() + vars(self.pargs).items()))
            if not result:
                return
            if self.pargs.clean:
                rm_tarball(self,tarball=self.pargs.tarball)

        # Send the tarball to Swestore using irods
        if self.pargs.send_to_swestore:
            result = send_to_swestore(self,**dict(self.config.get_section_dict('archive').items() + vars(self.pargs).items()))
            if not result:
                # If archiving failed, we need to give a non-zero exit code in order for a remote instance to detect the failure
                sys.exit(1)
            if self.pargs.clean:
                rm_tarball(self,tarball=self.pargs.tarball)
            #Set the run as archived in StatusDB
            fc_id = self.pargs.flowcell if self.pargs.flowcell else self.pargs.tarball.split('.')[0]
            fc_db_id = f_conn.id_view.get(fc_id)
            if fc_db_id:
                f_conn.set_storage_status(fc_db_id, 'swestore_archived')
            else:
                self.log.warn("Flowcell {} not found in the database, not changing status.".format(fc_id))
            # Log to statusdb
            if self.pargs.log_to_db:
                # implement this
                raise NotImplementedError("logging to db functionality not implemented")