예제 #1
0
파일: bank.py 프로젝트: hexylena/biomaj
    def __init__(self, name, options=None, no_log=False):
        """
        Get a bank from db or creates a new one

        :param name: name of the bank, must match its config file
        :type name: str
        :param options: bank options
        :type options: argparse
        :param no_log: create a log file for the bank
        :type no_log: bool
        """
        logging.debug('Initialize ' + name)
        if BiomajConfig.global_config is None:
            raise Exception('Configuration must be loaded first')

        self.name = name
        self.depends = []
        self.no_log = no_log

        if no_log:
            if options is None:
                # options = {'no_log': True}
                options = Options()
                options.no_log = True
            else:
                options.no_log = no_log

        self.config = BiomajConfig(self.name, options)

        if self.config.get('bank.num.threads') is not None:
            ProcessFactory.NB_THREAD = int(self.config.get('bank.num.threads'))

        if self.config.log_file is not None and self.config.log_file != 'none':
            logging.info("Log file: " + self.config.log_file)

        # self.options = Options(options)
        if options is None:
            self.options = Options()
        else:
            self.options = options

        if MongoConnector.db is None:
            MongoConnector(BiomajConfig.global_config.get('GENERAL', 'db.url'),
                           BiomajConfig.global_config.get('GENERAL', 'db.name'))

        self.banks = MongoConnector.banks
        self.bank = self.banks.find_one({'name': self.name})

        if self.bank is None:
            self.bank = {
                'name': self.name,
                'current': None,
                'sessions': [],
                'production': [],
                'properties': self.get_properties()
            }
            self.bank['_id'] = self.banks.insert(self.bank)

        self.session = None
        self.use_last_session = False
예제 #2
0
  def setUp(self):
    self.utils = UtilsForTest()

    self.curdir = os.path.dirname(os.path.realpath(__file__))
    self.examples = os.path.join(self.curdir,'bank') + '/'

    BiomajConfig.load_config(self.utils.global_properties, allow_user_config=False)

    '''
예제 #3
0
    def setUp(self):
        self.utils = UtilsForTest()

        self.curdir = os.path.dirname(os.path.realpath(__file__))
        self.examples = os.path.join(self.curdir, 'bank') + '/'

        BiomajConfig.load_config(self.utils.global_properties,
                                 allow_user_config=False)
        '''
예제 #4
0
class TestElastic(unittest.TestCase):
    """
  test indexing and search
  """

    def setUp(self):
        BmajIndex.es = None
        self.utils = UtilsForTest()
        curdir = os.path.dirname(os.path.realpath(__file__))
        BiomajConfig.load_config(self.utils.global_properties, allow_user_config=False)
        if BmajIndex.do_index == False:
            self.skipTest("Skipping indexing tests due to elasticsearch not available")
        # Delete all banks
        b = Bank("local")
        b.banks.remove({})

        self.config = BiomajConfig("local")
        data_dir = self.config.get("data.dir")
        lock_file = os.path.join(data_dir, "local.lock")
        if os.path.exists(lock_file):
            os.remove(lock_file)

    def tearDown(self):
        data_dir = self.config.get("data.dir")
        lock_file = os.path.join(data_dir, "local.lock")
        if os.path.exists(lock_file):
            os.remove(lock_file)
        self.utils.clean()
        BmajIndex.delete_all_bank("test")

    def test_index(self):
        prod = {
            "data_dir": "/tmp/test/data",
            "formats": {
                "fasta": [
                    {"files": ["fasta/chr1.fa", "fasta/chr2.fa"], "types": ["nucleic"], "tags": {"organism": "hg19"}}
                ],
                "blast": [
                    {"files": ["blast/chr1/chr1db"], "types": ["nucleic"], "tags": {"chr": "chr1", "organism": "hg19"}}
                ],
            },
            "freeze": False,
            "session": 1416229253.930908,
            "prod_dir": "alu-2003-11-26",
            "release": "2003-11-26",
            "types": ["nucleic"],
        }
        BmajIndex.add("test", prod, True)
        query = {"query": {"match": {"bank": "test"}}}
        res = BmajIndex.search(query)
        self.assertTrue(len(res) == 2)
예제 #5
0
  def setUp(self):
    self.utils = UtilsForTest()
    curdir = os.path.dirname(os.path.realpath(__file__))
    BiomajConfig.load_config(self.utils.global_properties, allow_user_config=False)

    #Delete all banks
    b = Bank('local')
    b.banks.remove({})

    self.config = BiomajConfig('local')
    data_dir = self.config.get('data.dir')
    lock_file = os.path.join(data_dir,'local.lock')
    if os.path.exists(lock_file):
      os.remove(lock_file)
예제 #6
0
    def setUp(self):
        self.utils = UtilsForTest()
        curdir = os.path.dirname(os.path.realpath(__file__))
        BiomajConfig.load_config(self.utils.global_properties,
                                 allow_user_config=False)

        #Delete all banks
        b = Bank('local')
        b.banks.remove({})

        self.config = BiomajConfig('local')
        data_dir = self.config.get('data.dir')
        lock_file = os.path.join(data_dir, 'local.lock')
        if os.path.exists(lock_file):
            os.remove(lock_file)
예제 #7
0
    def setUp(self):
        BmajIndex.es = None
        self.utils = UtilsForTest()
        curdir = os.path.dirname(os.path.realpath(__file__))
        BiomajConfig.load_config(self.utils.global_properties, allow_user_config=False)
        if BmajIndex.do_index == False:
            self.skipTest("Skipping indexing tests due to elasticsearch not available")
        # Delete all banks
        b = Bank("local")
        b.banks.remove({})

        self.config = BiomajConfig("local")
        data_dir = self.config.get("data.dir")
        lock_file = os.path.join(data_dir, "local.lock")
        if os.path.exists(lock_file):
            os.remove(lock_file)
예제 #8
0
    def __init__(self):
        # If connector type not set, try to get it from the global.properties
        if not BiomajConfig.global_config:
            BiomajConfig.load_config()
        url = BiomajConfig.global_config.get('GENERAL', 'db.url')
        db = BiomajConfig.global_config.get('GENERAL', 'db.name')
        if url is None:
            raise Exception("No connection url set!")
        if db is None:
            raise Exception("No connection db set!")
        driver = split(url, ':')[0]
        if not driver:
            raise Exception("Can't determine database driver")

        Connector.url = url
        Connector.db = db
        Connector.driver = driver
예제 #9
0
class TestBiomajHTTPDownload(unittest.TestCase):
    """
  Test HTTP downloader
  """
    def setUp(self):
        self.utils = UtilsForTest()
        BiomajConfig.load_config(self.utils.global_properties,
                                 allow_user_config=False)
        self.config = BiomajConfig('testhttp')

    def tearDown(self):
        self.utils.clean()

    def test_http_list(self):
        httpd = HTTPDownload('http', 'ftp2.fr.debian.org', '/debian/dists/',
                             self.config)
        (file_list, dir_list) = httpd.list()
        httpd.close()
        self.assertTrue(len(file_list) == 1)

    def test_http_list_dateregexp(self):
        self.config.set('http.parse.file.date.format', "%%d-%%b-%%Y %%H:%%M")
        httpd = HTTPDownload('http', 'ftp2.fr.debian.org', '/debian/dists/',
                             self.config)
        (file_list, dir_list) = httpd.list()
        httpd.close()
        self.assertTrue(len(file_list) == 1)

    def test_http_download(self):
        httpd = HTTPDownload('http', 'ftp2.fr.debian.org', '/debian/dists/',
                             self.config)
        (file_list, dir_list) = httpd.list()
        httpd.match([r'^README$'], file_list, dir_list)
        httpd.download(self.utils.data_dir)
        httpd.close()
        self.assertTrue(len(httpd.files_to_download) == 1)

    def test_http_download_in_subdir(self):
        httpd = HTTPDownload('http', 'ftp2.fr.debian.org', '/debian/',
                             self.config)
        (file_list, dir_list) = httpd.list()
        httpd.match([r'^dists/README$'], file_list, dir_list)
        httpd.download(self.utils.data_dir)
        httpd.close()
        self.assertTrue(len(httpd.files_to_download) == 1)
예제 #10
0
    def setUp(self):
        BmajIndex.es = None
        self.utils = UtilsForTest()
        curdir = os.path.dirname(os.path.realpath(__file__))
        BiomajConfig.load_config(self.utils.global_properties,
                                 allow_user_config=False)
        if BmajIndex.do_index == False:
            self.skipTest(
                "Skipping indexing tests due to elasticsearch not available")
        # Delete all banks
        b = Bank('local')
        b.banks.remove({})
        BmajIndex.delete_all_bank('local')

        self.config = BiomajConfig('local')
        data_dir = self.config.get('data.dir')
        lock_file = os.path.join(data_dir, 'local.lock')
        if os.path.exists(lock_file):
            os.remove(lock_file)
예제 #11
0
class TestElastic(unittest.TestCase):
    '''
    test indexing and search
    '''
    def setUp(self):
        BmajIndex.es = None
        self.utils = UtilsForTest()
        curdir = os.path.dirname(os.path.realpath(__file__))
        BiomajConfig.load_config(self.utils.global_properties,
                                 allow_user_config=False)
        if BmajIndex.do_index == False:
            self.skipTest(
                "Skipping indexing tests due to elasticsearch not available")
        # Delete all banks
        b = Bank('local')
        b.banks.remove({})
        BmajIndex.delete_all_bank('local')

        self.config = BiomajConfig('local')
        data_dir = self.config.get('data.dir')
        lock_file = os.path.join(data_dir, 'local.lock')
        if os.path.exists(lock_file):
            os.remove(lock_file)

    def tearDown(self):
        data_dir = self.config.get('data.dir')
        lock_file = os.path.join(data_dir, 'local.lock')
        if os.path.exists(lock_file):
            os.remove(lock_file)
        self.utils.clean()
        BmajIndex.delete_all_bank('test')

    def test_index(self):
        BmajIndex.do_index = True
        prod = {
            "data_dir": "/tmp/test/data",
            "formats": {
                "fasta": [{
                    "files": ["fasta/chr1.fa", "fasta/chr2.fa"],
                    "types": ["nucleic"],
                    "tags": {
                        "organism": "hg19"
                    }
                }],
                "blast": [{
                    "files": ["blast/chr1/chr1db"],
                    "types": ["nucleic"],
                    "tags": {
                        "chr": "chr1",
                        "organism": "hg19"
                    }
                }]
            },
            "freeze": False,
            "session": 1416229253.930908,
            "prod_dir": "alu-2003-11-26",
            "release": "2003-11-26",
            "types": ["nucleic"]
        }

        BmajIndex.add('test', prod, True)

        query = {'query': {'match': {'bank': 'test'}}}
        res = BmajIndex.search(query)
        self.assertTrue(len(res) == 2)

    def test_remove_all(self):
        self.test_index()
        query = {'query': {'match': {'bank': 'test'}}}
        BmajIndex.delete_all_bank('test')
        res = BmajIndex.search(query)
        self.assertTrue(len(res) == 0)
예제 #12
0
class Bank(object):
    '''
    BioMAJ bank
    '''

    def __init__(self, name, options=None, no_log=False):
        '''
        Get a bank from db or creates a new one

        :param name: name of the bank, must match its config file
        :type name: str
        :param options: bank options
        :type options: argparse
        :param no_log: create a log file for the bank
        :type no_log: bool
        '''
        logging.debug('Initialize ' + name)
        if BiomajConfig.global_config is None:
            raise Exception('Configuration must be loaded first')

        self.name = name
        self.depends = []
        self.no_log = no_log

        if no_log:
            if options is None:
                # options = {'no_log': True}
                options = Options()
                options.no_log = True
            else:
                options.no_log = no_log

        self.config = BiomajConfig(self.name, options)

        if self.config.get('bank.num.threads') is not None:
            ProcessFactory.NB_THREAD = int(self.config.get('bank.num.threads'))

        if self.config.log_file is not None and self.config.log_file != 'none':
            logging.info("Log file: " + self.config.log_file)

        # self.options = Options(options)
        if options is None:
            self.options = Options()
        else:
            self.options = options

        # if MongoConnector.db is None:
        #     MongoConnector(BiomajConfig.global_config.get('GENERAL', 'db.url'),
        #                    BiomajConfig.global_config.get('GENERAL', 'db.name'))
        #
        # self.banks = MongoConnector.banks
        # self.bank = self.banks.find_one({'name': self.name})

        self.connector = Connector().get_connector()
        #self.banks = self.connector.get_collection('banks')
        self.banks = self.connector
        self.bank = self.connector.get({'name': self.name})

        if self.bank is None:
            self.bank = {
                'name': self.name,
                'current': None,
                'sessions': [],
                'production': [],
                'properties': self.get_properties()
            }
            #self.bank['_id'] = self.banks.insert(self.bank)
            self.bank['_id'] = self.connector.set('banks', self.bank)

        self.session = None
        self.use_last_session = False

    def check(self):
        '''
        Checks bank configuration
        '''
        return self.config.check()

    def is_locked(self):
        '''
        Checks if bank is locked ie action is in progress
        '''
        data_dir = self.config.get('data.dir')
        lock_dir = self.config.get('lock.dir', default=data_dir)
        lock_file = os.path.join(lock_dir, self.name + '.lock')
        if os.path.exists(lock_file):
            return True
        else:
            return False

    def get_bank(self):
        '''
        Get bank stored in db

        :return: bank json object
        '''
        return self.bank

    @staticmethod
    def get_banks_disk_usage():
        '''
        Get disk usage per bank and release
        '''
        if MongoConnector.db is None:
            MongoConnector(BiomajConfig.global_config.get('GENERAL', 'db.url'),
                           BiomajConfig.global_config.get('GENERAL', 'db.name'))

        bank_list = []
        banks = MongoConnector.banks.find({}, {'name': 1, 'production': 1})
        for b in banks:
            bank_elt = {'name': b['name'], 'size': 0, 'releases': []}
            for p in b['production']:
                if p['size'] is None:
                    p['size'] = 0
                bank_elt['size'] += p['size']
                bank_elt['releases'].append({'name': p['release'], 'size': p['size']})
            bank_list.append(bank_elt)
        return bank_list

    def get_bank_release_info(self, full=False):
        '''
        Get release info for the bank. Used with --status option from biomaj-cly.py
        :param full: Display full for the bank
        :type full: Boolean
        :return: Dict with keys
                      if full=True
                           - info, prod, pend
                      else
                           - info
        '''

        _bank = self.bank
        info = {}

        if full:
            bank_info = []
            prod_info = []
            pend_info = []
            release = None
            if 'current' in _bank and _bank['current']:
                for prod in _bank['production']:
                    if _bank['current'] == prod['session']:
                        release = prod['release']
            # Bank info header
            bank_info.append(["Name", "Type(s)", "Last update status", "Published release"])
            bank_info.append([_bank['name'],
                              str(','.join(_bank['properties']['type'])),
                              str(datetime.fromtimestamp(_bank['last_update_session']).strftime("%Y-%m-%d %H:%M:%S")),
                              str(release)])
            # Bank production info header
            prod_info.append(["Session", "Remote release", "Release", "Directory", "Freeze"])
            for prod in _bank['production']:
                data_dir = self.config.get('data.dir')
                dir_version = self.config.get('dir.version')
                if 'data.dir' in prod:
                    data_dir = prod['data.dir']
                if 'dir.version' in prod:
                    dir_version = prod['dir.version']
                release_dir = os.path.join(data_dir,
                                           dir_version,
                                           prod['prod_dir'])
                date = datetime.fromtimestamp(prod['session']).strftime('%Y-%m-%d %H:%M:%S')
                prod_info.append([date,
                                  prod['remoterelease'],
                                  prod['release'],
                                  release_dir,
                                  'yes' if 'freeze' in prod and prod['freeze'] else 'no'])
            # Bank pending info header
            if 'pending' in _bank and len(_bank['pending'].keys()) > 0:
                pend_info.append(["Pending release", "Last run"])
                for pending in _bank['pending'].keys():
                    run = datetime.fromtimestamp(_bank['pending'][pending]).strftime('%Y-%m-%d %H:%M:%S')
                    pend_info.append([pending, run])

            info['info'] = bank_info
            info['prod'] = prod_info
            info['pend'] = pend_info
            return info

        else:
            release = 'N/A'
            if 'current' in _bank and _bank['current']:
                for prod in _bank['production']:
                    if _bank['current'] == prod['session']:
                        release = prod['remoterelease']
            info['info'] = [_bank['name'], ','.join(_bank['properties']['type']),
                            str(release), _bank['properties']['visibility']]
            return info

    def update_dependencies(self):
        '''
        Update bank dependencies

        :return: status of updates
        '''
        self.depends = []
        if self.run_depends:
            depends = self.get_dependencies()
        else:
            depends = []

        self.session.set('depends', {})
        res = True
        for dep in depends:
            self.session._session['depends'][dep] = False
        for dep in depends:
            if self.session._session['depends'][dep]:
                logging.debug('Update:Depends:' + dep + ':SKIP')
                # Bank has been marked as depends multiple times, run only once
                continue
            logging.info('Update:Depends:' + dep)
            b = Bank(dep)
            res = b.update()
            self.depends.append(b)
            self.session._session['depends'][dep] = res
            logging.info('Update:Depends:' + dep + ':' + str(res))
            if not res:
                break
        return res

    def get_bank(self, bank, no_log=False):
        '''
        Gets an other bank
        '''
        return Bank(bank, no_log=no_log)

    def get_dependencies(self, bank=None):
        '''
        Search all bank dependencies

        :return: list of bank names to update
        '''
        if bank is None:
            deps = self.config.get('depends')
        else:
            deps = bank.config.get('depends')
        if deps is None:
            return []
        # Mainn deps
        deps = deps.split(',')
        # Now search in deps if they themselves depend on other banks
        for dep in deps:
            b = Bank(dep, no_log = True)
            deps = b.get_dependencies() + deps
        return deps

    def is_owner(self):
        '''
        Checks if current user is owner or admin
        '''
        admin_config = self.config.get('admin')
        admin = []
        if admin_config is not None:
            admin = [x.strip() for x in admin_config.split(',')]
        if admin and os.environ['LOGNAME'] in admin:
            return True
        if os.environ['LOGNAME'] == self.bank['properties']['owner']:
            return True
        return False

    def set_owner(self, owner):
        '''
        Update bank owner, only if current owner
        '''
        if not self.is_owner():
            logging.error('Not authorized, bank owned by ' + self.bank['properties']['owner'])
            raise Exception('Not authorized, bank owned by ' + self.bank['properties']['owner'])

        #self.banks.update({'name': self.name}, {'$set': {'properties.owner': owner}})
        self.banks.update({'name': self.name}, {'$set': {'properties.owner': owner}})

    def set_visibility(self, visibility):
        '''
        Update bank visibility, only if current owner
        '''
        if not self.is_owner():
            logging.error('Not authorized, bank owned by ' + self.bank['properties']['owner'])
            raise Exception('Not authorized, bank owned by ' + self.bank['properties']['owner'])

        self.banks.update({'name': self.name}, {'$set': {'properties': {'visibility': visibility}}})

    def get_properties(self):
        '''
        Read bank properties from config file

        :return: properties dict
        '''

        owner = os.environ['LOGNAME']
        # If owner not set, use current user, else keep current
        if self.bank and 'properties' in self.bank and 'owner' in self.bank['properties']:
            owner = self.bank['properties']['owner']

        props = {
            'visibility': self.config.get('visibility.default'),
            'type': self.config.get('db.type').split(','),
            'tags': [],
            'owner': owner
        }

        return props

    @staticmethod
    def searchindex(query):
        return BmajIndex.searchq(query)

    @staticmethod
    def search(formats=None, types=None, with_sessions=True):
        '''
        Search all bank releases matching some formats and types

        Matches production release with at least one of formats and one of types
        '''
        if formats is None:
            formats = []

        if types is None:
            types = []

        if MongoConnector.db is None:
            MongoConnector(BiomajConfig.global_config.get('GENERAL', 'db.url'),
                           BiomajConfig.global_config.get('GENERAL', 'db.name'))
        searchfilter = {}
        if formats:
            searchfilter['production.formats'] = {'$in': formats}
        if with_sessions:
            res = MongoConnector.banks.find(searchfilter)
        else:
            res = MongoConnector.banks.find(searchfilter, {'sessions': 0})
        # Now search in which production release formats and types apply
        search_list = []
        for r in res:
            prod_to_delete = []
            for p in r['production']:
                is_format = False
                if not formats:
                    is_format = True
                # Are formats present in this production release?
                for f in formats:
                    if f in p['formats']:
                        is_format = True
                        break
                # Are types present in this production release?
                is_type = False
                if not types:
                    is_type = True
                if is_format:
                    for t in types:
                        if t in p['types'] or t in r['properties']['type']:
                            is_type = True
                            break
                if not is_type or not is_format:
                    prod_to_delete.append(p)
            for prod_del in prod_to_delete:
                r['production'].remove(prod_del)
            if len(r['production']) > 0:
                search_list.append(r)
        return search_list

    @staticmethod
    def list(with_sessions=False):
        '''
        Return a list of banks

        :param with_sessions: should sessions be returned or not (can be quite big)
        :type with_sessions: bool
        :return: list of :class:`biomaj.bank.Bank`
        '''
        if MongoConnector.db is None:
            MongoConnector(BiomajConfig.global_config.get('GENERAL', 'db.url'),
                           BiomajConfig.global_config.get('GENERAL', 'db.name'))

        bank_list = []
        if with_sessions:
            res = MongoConnector.banks.find({})
        else:
            res = MongoConnector.banks.find({}, {'sessions': 0})
        for r in res:
            bank_list.append(r)
        return bank_list

    def controls(self):
        '''
        Initial controls (create directories etc...)
        '''
        data_dir = self.config.get('data.dir')
        bank_dir = self.config.get('dir.version')
        bank_dir = os.path.join(data_dir, bank_dir)
        if not os.path.exists(bank_dir):
            os.makedirs(bank_dir)

        offline_dir = self.config.get('offline.dir.name')
        offline_dir = os.path.join(data_dir, offline_dir)
        if not os.path.exists(offline_dir):
            os.makedirs(offline_dir)

        log_dir = self.config.get('log.dir')
        log_dir = os.path.join(log_dir, self.name)
        if not os.path.exists(log_dir):
            os.makedirs(log_dir)

    def _delete(self):
        '''
        Delete bank from database, not files
        '''
        self.banks.remove({'_id': self.bank['_id']})

    def save_session(self):
        '''
        Save session in database
        '''
        self.session._session['last_update_time'] = time.time()
        self.session._session['log_file'] = self.config.log_file
        if self.use_last_session:
            # Remove last session
            self.banks.update({'name': self.name}, {'$pull': {'sessions': {'id': self.session._session['id']}}})
        # Insert session
        if self.session.get('action') == 'update':
            action = 'last_update_session'
        if self.session.get('action') == 'remove':
            action = 'last_remove_session'


        cache_dir = self.config.get('cache.dir')
        download_files = self.session.get('download_files')
        if download_files is not None:
            f_downloaded_files = open(os.path.join(cache_dir, 'files_'+str(self.session.get('id'))), 'w')
            f_downloaded_files.write(json.dumps(download_files))
            f_downloaded_files.close()
            self.session.set('download_files',[])

        local_files = self.session.get('files')
        if local_files is not None:
            f_local_files = open(os.path.join(cache_dir, 'local_files_'+str(self.session.get('id'))), 'w')
            f_local_files.write(json.dumps(download_files))
            f_local_files.close()
            self.session.set('files',[])


        self.banks.update({'name': self.name}, {
            '$set': {
                action: self.session._session['id'],
                'properties': self.get_properties()
            },
            '$push': {'sessions': self.session._session}
        })
        BmajIndex.add(self.name, self.session._session)
        if self.session.get('action') == 'update' and not self.session.get_status(
                Workflow.FLOW_OVER) and self.session.get('release'):
            self.banks.update({'name': self.name},
                              {'$set': {'pending.' + self.session.get('release'): self.session._session['id']}})
        if self.session.get('action') == 'update' and self.session.get_status(Workflow.FLOW_OVER) and self.session.get(
                'update'):
            # We expect that a production release has reached the FLOW_OVER status.
            # If no update is needed (same release etc...), the *update* session of the session is set to False
            logging.debug('Bank:Save:' + self.name)
            if len(self.bank['production']) > 0:
                # Remove from database
                self.banks.update({'name': self.name},
                                  {'$pull': {'production': {'release': self.session._session['release']}}})
                # Update local object
                # index = 0
                # for prod in self.bank['production']:
                #  if prod['release'] == self.session._session['release']:
                #    break;
                #  index += 1
                # if index < len(self.bank['production']):
                #  self.bank['production'].pop(index)
            release_types = []
            if self.config.get('db.type'):
                release_types = self.config.get('db.type').split(',')
            release_formats = list(self.session._session['formats'].keys())
            if self.config.get('db.formats'):
                config_formats = self.config.get('db.formats').split(',')
                for config_format in config_formats:
                    if config_format not in release_formats:
                        release_formats.append(config_format)

            for release_format in self.session._session['formats']:
                for release_files in self.session._session['formats'][release_format]:
                    if release_files['types']:
                        for rtype in release_files['types']:
                            if rtype not in release_types:
                                release_types.append(rtype)
            prod_dir = self.session.get_release_directory()
            if self.session.get('prod_dir'):
                prod_dir = self.session.get('prod_dir')
            production = {'release': self.session.get('release'),
                          'remoterelease': self.session.get('remoterelease'),
                          'session': self.session._session['id'],
                          'formats': release_formats,
                          'types': release_types,
                          'size': self.session.get('fullsize'),
                          'data_dir': self.session._session['data_dir'],
                          'dir_version': self.session._session['dir_version'],
                          'prod_dir': prod_dir,
                          'freeze': False}
            self.bank['production'].append(production)

            self.banks.update({'name': self.name},
                              {'$push': {'production': production},
                               '$unset': {'pending.' + self.session.get('release'): ''}
                               })

            # self.banks.update({'name': self.name},
            #                  {'$unset': 'pending.'+self.session.get('release')
            #                  })

        self.bank = self.banks.find_one({'name': self.name})

    def clean_old_sessions(self):
        '''
        Delete old sessions, not latest ones nor related to production sessions
        '''
        if self.session is None:
            return
        # No previous session
        if 'sessions' not in self.bank:
            return
        if self.config.get_bool('keep.old.sessions'):
            logging.debug('keep old sessions, skipping...')
            return
        # 'last_update_session' in self.bank and self.bank['last_update_session']
        old_sessions = []
        prod_releases = []
        for session in self.bank['sessions']:
            if session['id'] == self.session.get('id'):
                # Current session
                prod_releases.append(session['release'])
                continue
            if session['id'] == self.session.get('last_update_session'):
                prod_releases.append(session['release'])
                continue
            if session['id'] == self.session.get('last_remove_session'):
                continue
            is_prod_session = False
            for prod in self.bank['production']:
                if session['id'] == prod['session']:
                    is_prod_session = True
                    break
            if is_prod_session:
                prod_releases.append(session['release'])
                continue
            old_sessions.append(session)
        if len(old_sessions) > 0:
            for session in old_sessions:
                session_id = session['id']
                self.banks.update({'name': self.name}, {'$pull': {'sessions': {'id': session_id}}})
                # Check if in pending sessions
                for rel in list(self.bank['pending'].keys()):
                    rel_session = self.bank['pending'][rel]
                    if rel_session == session_id:
                        self.banks.update({'name': self.name}, {'$unset': {'pending': {str(session['release']): ""}}})
                if session['release'] not in prod_releases and session['release'] != self.session.get('release'):
                    # There might be unfinished releases linked to session, delete them
                    # if they are not related to a production directory or latest run
                    session_dir = os.path.join(self.config.get('data.dir'),
                                               self.config.get('dir.version'),
                                               self.name + self.config.get('release.separator', default='_') + str(session['release']))
                    if os.path.exists(session_dir):
                        logging.info('Bank:DeleteOldSessionDir:' + self.name + self.config.get('release.separator', default='_') + str(session['release']))
                        shutil.rmtree(session_dir)
            self.bank = self.banks.find_one({'name': self.name})

    def publish(self):
        '''
        Set session release to *current*
        '''
        if not self.is_owner():
            logging.error('Not authorized, bank owned by ' + self.bank['properties']['owner'])
            raise Exception('Not authorized, bank owned by ' + self.bank['properties']['owner'])

        current_link = os.path.join(self.config.get('data.dir'),
                                    self.config.get('dir.version'),
                                    'current')
        prod_dir = self.session.get_full_release_directory()

        to_dir = os.path.join(self.config.get('data.dir'),
                              self.config.get('dir.version'))

        if os.path.lexists(current_link):
            os.remove(current_link)
        os.chdir(to_dir)
        os.symlink(self.session.get_release_directory(), 'current')
        self.bank['current'] = self.session._session['id']
        self.banks.update({'name': self.name},
                          {
                              '$set': {'current': self.session._session['id']}
                          })

    def unpublish(self):
        '''
        Unset *current*
        '''
        if not self.is_owner():
            logging.error('Not authorized, bank owned by ' + self.bank['properties']['owner'])
            raise Exception('Not authorized, bank owned by ' + self.bank['properties']['owner'])

        current_link = os.path.join(self.config.get('data.dir'),
                                    self.config.get('dir.version'),
                                    'current')

        if os.path.lexists(current_link):
            os.remove(current_link)
        self.banks.update({'name': self.name},
                          {
                              '$set': {'current': None}
                          })

    def get_production(self, release):
        '''
        Get production field for release

        :param release: release name or production dir name
        :type release: str
        :return: production field
        '''
        release = str(release)
        production = None
        for prod in self.bank['production']:
            if prod['release'] == release or prod['prod_dir'] == release:
                production = prod
        return production

    def freeze(self, release):
        '''
        Freeze a production release

        When freezed, a production release cannot be removed (manually or automatically)

        :param release: release name or production dir name
        :type release: str
        :return: bool
        '''
        release = str(release)
        if not self.is_owner():
            logging.error('Not authorized, bank owned by ' + self.bank['properties']['owner'])
            raise Exception('Not authorized, bank owned by ' + self.bank['properties']['owner'])

        rel = None
        for prod in self.bank['production']:
            if prod['release'] == release or prod['prod_dir'] == release:
                # Search session related to this production release
                rel = prod['release']
        if rel is None:
            logging.error('Release not found: ' + release)
        self.banks.update({'name': self.name, 'production.release': rel}, {'$set': {'production.$.freeze': True}})
        self.bank = self.banks.find_one({'name': self.name})
        return True

    def unfreeze(self, release):
        '''
        Unfreeze a production release to allow removal

        :param release: release name or production dir name
        :type release: str
        :return: bool
        '''
        release = str(release)
        if not self.is_owner():
            logging.error('Not authorized, bank owned by ' + self.bank['properties']['owner'])
            raise Exception('Not authorized, bank owned by ' + self.bank['properties']['owner'])

        rel = None
        for prod in self.bank['production']:
            if prod['release'] == release or prod['prod_dir'] == release:
                # Search session related to this production release
                rel = prod['release']
        if rel is None:
            logging.error('Release not found: ' + release)
        self.banks.update({'name': self.name, 'production.release': rel}, {'$set': {'production.$.freeze': False}})
        self.bank = self.banks.find_one({'name': self.name})
        return True

    def get_new_session(self, flow=None):
        '''
        Returns an empty session

        :param flow: kind of workflow
        :type flow: :func:`biomaj.workflow.Workflow.FLOW`
        '''
        if flow is None:
            flow = Workflow.FLOW
        return Session(self.name, self.config, flow)

    def get_session_from_release(self, release):
        '''
        Loads the session matching a specific release

        :param release: release name oe production dir
        :type release: str
        :return: :class:`biomaj.session.Session`
        '''
        release = str(release)
        oldsession = None
        # Search production release matching release
        for prod in self.bank['production']:
            if prod['release'] == release or prod['prod_dir'] == release:
                # Search session related to this production release
                for s in self.bank['sessions']:
                    if s['id'] == prod['session']:
                        oldsession = s
                        break
                break
        if oldsession is None:
            # No prod session, try to find a session for this release, session may have failed or be stopped
            for s in self.bank['sessions']:
                if s['release'] and release.endswith(s['release']):
                    oldsession = s
        if oldsession is None:
            logging.error('No production session could be found for this release')
        return oldsession

    def load_session(self, flow=None, session=None):
        '''
        Loads last session or, if over or forced, a new session

        Creates a new session or load last session if not over

        :param flow: kind of workflow
        :type flow: :func:`biomaj.workflow.Workflow.FLOW`
        '''
        if flow is None:
            flow = Workflow.FLOW

        if session is not None:
            logging.debug('Load specified session ' + str(session['id']))
            self.session = Session(self.name, self.config, flow)
            self.session.load(session)
            self.use_last_session = True
            return
        if len(self.bank['sessions']) == 0 or self.options.get_option(Options.FROMSCRATCH):
            self.session = Session(self.name, self.config, flow)
            logging.debug('Start new session')
        else:
            # Take last session
            self.session = Session(self.name, self.config, flow)
            session_id = None
            # Load previous session for updates only
            if self.session.get('action') == 'update' and 'last_update_session' in self.bank and self.bank[
                'last_update_session']:
                session_id = self.bank['last_update_session']
                load_session = None
                for session in self.bank['sessions']:
                    if session['id'] == session_id:
                        load_session = session
                        break
                if load_session is not None:
                    # self.session.load(self.bank['sessions'][len(self.bank['sessions'])-1])
                    self.session.load(session)
                    # if self.config.last_modified > self.session.get('last_modified'):
                    #  # Config has changed, need to restart
                    #  self.session = Session(self.name, self.config, flow)
                    #  logging.info('Configuration file has been modified since last session, restart in any case a new session')
                    if self.session.get_status(Workflow.FLOW_OVER) and self.options.get_option(
                            Options.FROM_TASK) is None:
                        previous_release = self.session.get('remoterelease')
                        self.session = Session(self.name, self.config, flow)
                        self.session.set('previous_release', previous_release)
                        logging.debug('Start new session')
                    else:
                        logging.debug('Load previous session ' + str(self.session.get('id')))
                        self.use_last_session = True

    def remove_session(self, sid):
        '''
        Delete a session from db

        :param sid: id of the session
        :type sid: long
        :return: bool
        '''
        session_release = None
        _tmpbank = self.banks.find_one({'name': self.name})
        for s in _tmpbank['sessions']:
            if s['id'] == sid:
                session_release = s['release']


        cache_dir = self.config.get('cache.dir')
        download_files = os.path.join(cache_dir, 'files_'+str(sid))
        if os.path.exists(download_files):
            os.remove(download_files)

        local_files = os.path.join(cache_dir, 'local_files_'+str(sid))
        if os.path.exists(local_files):
            os.remove(local_files)

        if self.config.get_bool('keep.old.sessions'):
            logging.debug('keep old sessions')
            if session_release is not None:
                self.banks.update({'name': self.name}, {'$pull': {
                    'production': {'session': sid}
                },
                    '$unset': {
                        'pending.' + session_release: ''
                    }
                })
            else:
                self.banks.update({'name': self.name}, {'$pull': {
                    'production': {'session': sid}
                }
                })
            self.banks.update({'name': self.name, 'sessions.id': sid},
                              {'$set': {'sessions.$.deleted': time.time()}})
        else:
            if session_release is not None:
                self.banks.update({'name': self.name}, {'$pull': {
                    'sessions': {'id': sid},
                    'production': {'session': sid}
                },
                    '$unset': {
                        'pending.' + session_release: ''
                    }
                })
            else:
                self.banks.update({'name': self.name}, {'$pull': {
                    'sessions': {'id': sid},
                    'production': {'session': sid}
                }
                })
        # Update object
        self.bank = self.banks.find_one({'name': self.name})
        if session_release is not None:
            BmajIndex.remove(self.name, session_release)
        return True

    def get_data_dir(self):
        '''
        Returns bank data directory

        :return: str
        '''
        return os.path.join(self.config.get('data.dir'),
                            self.config.get('dir.version'))

    def removeAll(self, force=False):
        '''
        Remove all bank releases and database records

        :param force: force removal even if some production dirs are freezed
        :type force: bool
        :return: bool
        '''
        if not force:
            has_freeze = False
            for prod in self.bank['production']:
                if 'freeze' in prod and prod['freeze']:
                    has_freeze = True
                    break
            if has_freeze:
                logging.error('Cannot remove bank, some production directories are freezed, use force if needed')
                return False

        self.banks.remove({'name': self.name})
        BmajIndex.delete_all_bank(self.name)
        bank_data_dir = self.get_data_dir()
        logging.warn('DELETE ' + bank_data_dir)
        if os.path.exists(bank_data_dir):
            shutil.rmtree(bank_data_dir)
        bank_offline_dir = os.path.join(self.config.get('data.dir'), self.config.get('offline.dir.name'))
        if os.path.exists(bank_offline_dir):
            shutil.rmtree(bank_offline_dir)
        bank_log_dir = os.path.join(self.config.get('log.dir'), self.name)
        if os.path.exists(bank_log_dir) and self.no_log:
            shutil.rmtree(bank_log_dir)
        return True

    def get_status(self):
        '''
        Get status of current workflow

        :return: dict of current workflow status
        '''
        if self.bank['status'] is None:
            return {}
        return self.bank['status']

    def remove_pending(self, release):
        '''
        Remove pending releases

        :param release: release or release directory
        :type release: str
        :return: bool
        '''
        release = str(release)
        logging.warning('Bank:' + self.name + ':RemovePending')

        if not self.is_owner():
            logging.error('Not authorized, bank owned by ' + self.bank['properties']['owner'])
            raise Exception('Not authorized, bank owned by ' + self.bank['properties']['owner'])

        if not self.bank['pending']:
            return True
        pendings = self.bank['pending']
        for release in list(pendings.keys()):
            pending_session_id = pendings[release]
            pending_session = None
            for s in self.bank['sessions']:
                if s['id'] == pending_session_id:
                    pending_session = s
                    break
            session = Session(self.name, self.config, RemoveWorkflow.FLOW)
            if pending_session is None:
                session._session['release'] = release
            else:
                session.load(pending_session)
            if os.path.exists(session.get_full_release_directory()):
                logging.debug("Remove:Pending:Dir:" + session.get_full_release_directory())
                shutil.rmtree(session.get_full_release_directory())
            self.remove_session(pendings[release])
        self.banks.update({'name': self.name}, {'$set': {'pending': {}}})
        return True

    def remove(self, release):
        '''
        Remove a release (db and files)

        :param release: release or release directory
        :type release: str
        :return: bool
        '''
        release = str(release)
        logging.warning('Bank:' + self.name + ':Remove')

        if not self.is_owner():
            logging.error('Not authorized, bank owned by ' + self.bank['properties']['owner'])
            raise Exception('Not authorized, bank owned by ' + self.bank['properties']['owner'])

        self.session = self.get_new_session(RemoveWorkflow.FLOW)
        oldsession = None
        # Search production release matching release
        for prod in self.bank['production']:
            if prod['release'] == release or prod['prod_dir'] == release:
                if 'freeze' in prod and prod['freeze']:
                    logging.error('Cannot remove release, release is freezed, unfreeze it first')
                    return False
                # Search session related to this production release
                for s in self.bank['sessions']:
                    if s['id'] == prod['session']:
                        oldsession = s
                        break
                break
        if oldsession is None:
            logging.error('No production session could be found for this release')
            return False
        if 'current' in self.bank and self.bank['current'] == oldsession['id']:
            logging.error('This release is the release in the main release production, you should first unpublish it')
            return False

        # New empty session for removal
        session = Session(self.name, self.config, RemoveWorkflow.FLOW)
        session.set('action', 'remove')
        session.set('release', oldsession['release'])
        session.set('update_session_id', oldsession['id'])
        self.session = session
        # Reset status, we take an update session
        res = self.start_remove(session)
        self.session.set('workflow_status', res)

        self.save_session()

        return res

    def update(self, depends=False):
        '''
        Launch a bank update

        :param depends: run update of bank dependencies first
        :type depends: bool
        :return: bool
        '''
        logging.warning('Bank:' + self.name + ':Update')

        if not self.is_owner():
            logging.error('Not authorized, bank owned by ' + self.bank['properties']['owner'])
            raise Exception('Not authorized, bank owned by ' + self.bank['properties']['owner'])

        self.run_depends = depends

        self.controls()
        if self.options.get_option('release'):
            logging.info('Bank:' + self.name + ':Release:' + self.options.get_option('release'))
            s = self.get_session_from_release(self.options.get_option('release'))
            # No session in prod
            if s is None:
                logging.error('Release does not exists: ' + self.options.get_option('release'))
                return False
            self.load_session(UpdateWorkflow.FLOW, s)
        else:
            logging.info('Bank:' + self.name + ':Release:latest')
            self.load_session(UpdateWorkflow.FLOW)
        # if from task, reset workflow status in session.
        if self.options.get_option('from_task'):
            set_to_false = False
            for task in self.session.flow:
                # If task was in False status (KO) and we ask to start after this task, exit
                if not set_to_false and not self.session.get_status(task['name']) and task[
                    'name'] != self.options.get_option('from_task'):
                    logging.error(
                        'Previous task ' + task['name'] + ' was not successful, cannot restart after this task')
                    return False
                if task['name'] == self.options.get_option('from_task'):
                    set_to_false = True
                if set_to_false:
                    # After from_task task, tasks must be set to False to be run
                    self.session.set_status(task['name'], False)
                    proc = None
                    if task['name'] in [Workflow.FLOW_POSTPROCESS, Workflow.FLOW_PREPROCESS,
                                        Workflow.FLOW_REMOVEPROCESS]:
                        proc = self.options.get_option('process')
                        self.session.reset_proc(task['name'], proc)
                        # if task['name'] == Workflow.FLOW_POSTPROCESS:
                        #  self.session.reset_proc(Workflow.FLOW_POSTPROCESS, proc)
                        # elif task['name'] == Workflow.FLOW_PREPROCESS:
                        #  self.session.reset_proc(Workflow.FLOW_PREPROCESS, proc)
                        # elif task['name'] == Workflow.FLOW_REMOVEPROCESS:
                        #  self.session.reset_proc(Workflow.FLOW_REMOVEPROCESS, proc)
        self.session.set('action', 'update')
        res = self.start_update()
        self.session.set('workflow_status', res)
        self.save_session()
        return res

    def start_remove(self, session):
        '''
        Start a removal workflow

        :param session: Session to remove
        :type session: :class:`biomaj.session.Session`
        :return: bool
        '''
        workflow = RemoveWorkflow(self, session)
        return workflow.start()

    def start_update(self):
        '''
        Start an update workflow
        '''
        workflow = UpdateWorkflow(self)
        return workflow.start()
예제 #13
0
    def migrate_pendings():
        """
        Migrate database

        3.0.18: Check the actual BioMAJ version and if older than 3.0.17, do the 'pending' key migration
        """
        if BiomajConfig.global_config is None:
            try:
                BiomajConfig.load_config()
            except Exception as err:
                print("* SchemaVersion: Can't find config file")
                return None
        if MongoConnector.db is None:
            MongoConnector(
                BiomajConfig.global_config.get('GENERAL', 'db.url'),
                BiomajConfig.global_config.get('GENERAL', 'db.name'))

        schema = MongoConnector.db_schema
        banks = MongoConnector.banks

        schema_version = schema.find_one({'id': 1})
        installed_version = pkg_resources.get_distribution("biomaj").version
        if schema_version is None:
            schema_version = {'id': 1, 'version': '3.0.0'}
            schema.insert(schema_version)

        moderate = int(schema_version['version'].split('.')[1])
        minor = int(schema_version['version'].split('.')[2])

        if moderate == 0 and minor <= 17:
            print("Migrate from release: %s" % schema_version['version'])
            # Update pending releases
            bank_list = banks.find()
            updated = 0
            for bank in bank_list:
                if 'pending' in bank:
                    # Check we have an old pending type
                    if type(bank['pending']) == dict:
                        updated += 1
                        pendings = []
                        for release in sorted(
                                bank['pending'],
                                key=lambda r: bank['pending'][r]):
                            pendings.append({
                                'release': str(release),
                                'id': bank['pending'][str(release)]
                            })
                            if len(pendings) > 0:
                                banks.update({'name': bank['name']},
                                             {'$set': {
                                                 'pending': pendings
                                             }})
                    else:
                        # We remove old type for 'pending'
                        banks.update({'name': bank['name']},
                                     {'$unset': {
                                         'pending': ""
                                     }})

            print("Migration: %d bank(s) updated" % updated)
        schema.update_one({'id': 1}, {'$set': {'version': installed_version}})
예제 #14
0
def main():

    parser = argparse.ArgumentParser(add_help=False)
    parser.add_argument('-c', '--config', dest="config",help="Configuration file")
    parser.add_argument('--check', dest="check", help="Check bank property file", action="store_true", default=False)
    parser.add_argument('-u', '--update', dest="update", help="Update action", action="store_true", default=False)
    parser.add_argument('--fromscratch', dest="fromscratch", help="Force a new cycle update", action="store_true", default=False)
    parser.add_argument('-z', '--from-scratch', dest="fromscratch", help="Force a new cycle update", action="store_true", default=False)
    parser.add_argument('-p', '--publish', dest="publish", help="Publish", action="store_true", default=False)
    parser.add_argument('--unpublish', dest="unpublish", help="Unpublish", action="store_true", default=False)

    parser.add_argument('--release', dest="release", help="release of the bank")
    parser.add_argument('--from-task', dest="from_task", help="Start cycle at a specific task (init always executed)")
    parser.add_argument('--process', dest="process", help="Linked to from-task, optionally specify a block, meta or process name to start from")
    parser.add_argument('-l', '--log', dest="log", help="log level")
    parser.add_argument('-r', '--remove', dest="remove", help="Remove a bank release", action="store_true", default=False)
    parser.add_argument('--remove-all', dest="removeall", help="Remove all bank releases and database records", action="store_true", default=False)
    parser.add_argument('--remove-pending', dest="removepending", help="Remove pending release", action="store_true", default=False)
    parser.add_argument('-s', '--status', dest="status", help="Get status", action="store_true", default=False)
    parser.add_argument('-b', '--bank', dest="bank", help="bank name")
    parser.add_argument('--owner', dest="owner", help="change owner of the bank")
    parser.add_argument('--stop-before', dest="stop_before", help="Store workflow before task")
    parser.add_argument('--stop-after', dest="stop_after", help="Store workflow after task")
    parser.add_argument('--freeze', dest="freeze", help="Freeze a bank release", action="store_true", default=False)
    parser.add_argument('--unfreeze', dest="unfreeze", help="Unfreeze a bank release", action="store_true", default=False)
    parser.add_argument('-f', '--force', dest="force", help="Force action", action="store_true", default=False)
    parser.add_argument('-h', '--help', dest="help", help="Show usage", action="store_true", default=False)

    parser.add_argument('--search', dest="search", help="Search by format and types", action="store_true", default=False)
    parser.add_argument('--formats', dest="formats", help="List of formats to search, comma separated")
    parser.add_argument('--types', dest="types", help="List of types to search, comma separated")
    parser.add_argument('--query', dest="query", help="Lucene query syntax to search in index")

    parser.add_argument('--show', dest="show", help="Show format files for selected bank", action="store_true", default=False)

    parser.add_argument('-n', '--change-dbname', dest="newbank", help="Change old bank name to this new bank name")
    parser.add_argument('-e', '--move-production-directories', dest="newdir",help="Change bank production directories location to this new path, path must exists")
    parser.add_argument('--visibility', dest="visibility", help="visibility status of the bank")

    parser.add_argument('--maintenance', dest="maintenance", help="Maintenance mode (on/off/status)")

    parser.add_argument('--version', dest="version", help="Show version", action="store_true", default=False)
    parser.add_argument('--status-ko', dest="statusko", help="Get bank in KO status", action="store_true", default=False)


    options = Options()
    parser.parse_args(namespace=options)

    options.no_log = False

    if options.help:
        print('''
    --config: global.properties file path

    --status: list of banks with published release
        [OPTIONAL]
        --bank xx / bank: Get status details of bank

    --status-ko: list of banks in error status (last run)

    --log DEBUG|INFO|WARN|ERR  [OPTIONAL]: set log level in logs for this run, default is set in global.properties file

    --check: Check bank property file
        [MANDATORY]
        --bank xx: name of the bank to check (will check xx.properties)

    --owner yy: Change owner of the bank (user id)
        [MANDATORY]
        --bank xx: name of the bank

    --visibility public|private: change visibility public/private of a bank
        [MANDATORY]
        --bank xx: name of the bank

    --change-dbname yy: Change name of the bank to this new name
        [MANDATORY]
        --bank xx: current name of the bank

    --move-production-directories yy: Change bank production directories location to this new path, path must exists
        [MANDATORY]
        --bank xx: current name of the bank

    --update: Update bank
        [MANDATORY]
        --bank xx: name of the bank(s) to update, comma separated
        [OPTIONAL]
        --publish: after update set as *current* version
        --from-scratch: force a new update cycle, even if release is identical, release will be incremented like (myrel_1)
        --stop-before xx: stop update cycle before the start of step xx
        --stop-after xx: stop update cycle after step xx has completed
        --from-task xx --release yy: Force an re-update cycle for bank release *yy* or from current cycle (in production directories), skipping steps up to *xx*
        --process xx: linked to from-task, optionally specify a block, meta or process name to start from
        --release xx: release to update

    --publish: Publish bank as current release to use
        [MANDATORY]
        --bank xx: name of the bank to update
        --release xx: release of the bank to publish
    --unpublish: Unpublish bank (remove current)
        [MANDATORY]
        --bank xx: name of the bank to update

    --remove-all: Remove all bank releases and database records
        [MANDATORY]
        --bank xx: name of the bank to update
        [OPTIONAL]
        --force: remove freezed releases

    --remove-pending: Remove pending releases
        [MANDATORY]
        --bank xx: name of the bank to update

    --remove: Remove bank release (files and database release)
        [MANDATORY]
        --bank xx: name of the bank to update
        --release xx: release of the bank to remove

        Release must not be the *current* version. If this is the case, publish a new release before.

    --freeze: Freeze bank release (cannot be removed)
        [MANDATORY]
        --bank xx: name of the bank to update
        --release xx: release of the bank to remove

    --unfreeze: Unfreeze bank release (can be removed)
        [MANDATORY]
        --bank xx: name of the bank to update
        --release xx: release of the bank to remove

    --search: basic search in bank production releases, return list of banks
       --formats xx,yy : list of comma separated format
      AND/OR
       --types xx,yy : list of comma separated type

       --query "LUCENE query syntax": search in index (if activated)

    --show: Show bank files per format
      [MANDATORY]
      --bank xx: name of the bank to show
      [OPTIONAL]
      --release xx: release of the bank to show

    --maintenance on/off/status: (un)set biomaj in maintenance mode to prevent updates/removal

        ''')
        return

    if options.version:
        version = pkg_resources.require('biomaj')[0].version
        print('Version: '+str(version))
        return

    if options.stop_after or options.stop_before or options.from_task:
        available_steps = []
        for flow in UpdateWorkflow.FLOW:
            available_steps.append(flow['name'])
        for flow in RemoveWorkflow.FLOW:
            available_steps.append(flow['name'])
        if options.stop_after:
            if options.stop_after not in available_steps:
                print('Invalid step: '+options.stop_after)
                sys.exit(1)
        if options.stop_before:
            if options.stop_before not in available_steps:
                print('Invalid step: '+options.stop_before)
                sys.exit(1)
        if options.from_task:
            if options.from_task not in available_steps:
                print('Invalid step: '+options.from_task)
                sys.exit(1)

    bmaj = None
    try:
        if options.config is not None:
            BiomajConfig.load_config(options.config)
        else:
            BiomajConfig.load_config()
    except Exception as e:
        print(str(e))
        sys.exit(1)

    try:

        if options.maintenance:
            if options.maintenance not in ['on', 'off', 'status']:
                print("Wrong maintenance value [on,off,status]")
                sys.exit(1)
            data_dir = BiomajConfig.global_config.get('GENERAL', 'data.dir')
            if BiomajConfig.global_config.has_option('GENERAL', 'lock.dir'):
                lock_dir = BiomajConfig.global_config.get('GENERAL', 'lock.dir')
            else:
                lock_dir = data_dir
            maintenance_lock_file = os.path.join(lock_dir,'biomaj.lock')
            if options.maintenance == 'status':
                if os.path.exists(maintenance_lock_file):
                    print("Maintenance: On")
                else:
                    print("Maintenance: Off")
                sys.exit(0)
            if options.maintenance == 'on':
                f = open(maintenance_lock_file, 'w')
                f.write('1')
                f.close()
                print("Maintenance set to On")
                sys.exit(0)
            if options.maintenance == 'off':
                if os.path.exists(maintenance_lock_file):
                    os.remove(maintenance_lock_file)
                print("Maintenance set to Off")
                sys.exit(0)

        if options.owner:
            if not options.bank:
                print("Bank option is missing")
                sys.exit(1)
            bank = Bank(options.bank, no_log=True)
            bank.set_owner(options.owner)
            sys.exit(0)

        if options.visibility:
            if not options.bank:
                print("Bank option is missing")
                sys.exit(1)
            if options.visibility not in ['public', 'private']:
                print("Valid values are public|private")
                sys.exit(1)
            bank = Bank(options.bank, no_log=True)
            bank.set_visibility(options.visibility)
            print("Do not forget to update accordingly the visibility.default parameter in the configuration file")
            sys.exit(0)

        if options.newdir:
            if not options.bank:
                print("Bank option is missing")
                sys.exit(1)
            if not os.path.exists(options.newdir):
                print("Destination directory does not exists")
            bank = Bank(options.bank, options=options, no_log=True)
            if not bank.bank['production']:
                print("Nothing to move, no production directory")
                sys.exit(0)
            bank.load_session(Workflow.FLOW, None)
            w = Workflow(bank)
            res = w.wf_init()
            if not res:
                sys.exit(1)
            for prod in bank.bank['production']:
                session = bank.get_session_from_release(prod['release'])
                bank.load_session(Workflow.FLOW, session)
                prod_path = bank.session.get_full_release_directory()
                if os.path.exists(prod_path):
                    shutil.move(prod_path, options.newdir)
                prod['data_dir'] = options.newdir
            bank.banks.update({'name': options.bank}, {'$set' : { 'production': bank.bank['production'] }})
            print("Bank production directories moved to " + options.newdir)
            print("WARNING: do not forget to update accordingly the data.dir and dir.version properties")
            w.wf_over()
            sys.exit(0)

        if options.newbank:
            if not options.bank:
                print("Bank option is missing")
                sys.exit(1)
            bank = Bank(options.bank, no_log=True)
            conf_dir = BiomajConfig.global_config.get('GENERAL', 'conf.dir')
            bank_prop_file = os.path.join(conf_dir,options.bank+'.properties')
            config_bank = configparser.SafeConfigParser()
            config_bank.read([os.path.join(conf_dir,options.bank+'.properties')])
            config_bank.set('GENERAL', 'db.name', options.newbank)
            newbank_prop_file = open(os.path.join(conf_dir,options.newbank+'.properties'),'w')
            config_bank.write(newbank_prop_file)
            newbank_prop_file.close()
            bank.banks.update({'name': options.bank}, {'$set' : { 'name': options.newbank }})
            os.remove(bank_prop_file)
            print("Bank "+options.bank+" renamed to "+options.newbank)
            sys.exit(0)

        if options.search:
            if options.query:
                res = Bank.searchindex(options.query)
                print("Query matches for :"+options.query)
                results = [["Release", "Format(s)", "Type(s)", "Files"]]
                for match in res:
                    results.append([match['_source']['release'],
                                    str(match['_source']['format']),
                                    str(match['_source']['types']),
                                    ','.join(match['_source']['files'])])
                print(tabulate(results, headers="firstrow", tablefmt="grid"))
            else:
                formats = []
                if options.formats:
                    formats = options.formats.split(',')
                types = []
                if options.types:
                    types = options.types.split(',')
                print("Search by formats="+str(formats)+", types="+str(types))
                res = Bank.search(formats, types, False)
                results = [["Name", "Release", "Format(s)", "Type(s)", 'Current']]
                for bank in sorted(res, key=lambda bank: (bank['name'])):
                    b = bank['name']
                    bank['production'].sort(key=lambda n: n['release'], reverse=True)
                    for prod in bank['production']:
                        iscurrent = ""
                        if prod['session'] == bank['current']:
                            iscurrent = "yes"
                        results.append([b if b else '', prod['release'], ','.join(prod['formats']),
                                        ','.join(prod['types']), iscurrent])
                        b = None
                print(tabulate(results, headers="firstrow", tablefmt="grid"))
                sys.exit(0)

        if options.show:
            if not options.bank:
                print("Bank option is required")
                sys.exit(1)

            bank = Bank(options.bank, no_log=True)
            results = [["Name", "Release", "Format(s)", "Type(s)", "Tag(s)", "File(s)"]]
            current = None
            fformat = None
            if 'current' in bank.bank and bank.bank['current']:
                current = bank.bank['current']
            for prod in bank.bank['production']:
                include = True
                release = prod['release']
                if current == prod['session']:
                    release += ' (current)'
                if options.release and (prod['release'] != options.release and prod['prod_dir'] != options.release):
                    include =False
                if include:
                    session = bank.get_session_from_release(prod['release'])
                    formats = session['formats']
                    afiles = []
                    atags = []
                    atypes = []
                    for fformat in list(formats.keys()):
                        for elt in formats[fformat]:
                            atypes.append(','.join(elt['types']))
                            for tag in list(elt['tags'].keys()):
                                atags.append(elt['tags'][tag])
                            for eltfile in elt['files']:
                                afiles.append(eltfile)
                    results.append([bank.bank['name'], release, fformat, ','.join(atypes),
                                ','.join(atags), ','.join(afiles)])
            print(tabulate(results, headers="firstrow", tablefmt="grid"))
            sys.exit(0)

        if options.check:
            if not options.bank:
                print("Bank name is missing")
                sys.exit(1)
            bank = Bank(options.bank, no_log=True)
            print(options.bank+" check: "+str(bank.check())+"\n")
            sys.exit(0)

        if options.status:
            if options.bank:
                bank = Bank(options.bank, no_log=True)
                info = bank.get_bank_release_info(full=True)
                print(tabulate(info['info'], headers='firstrow', tablefmt='psql'))
                print(tabulate(info['prod'], headers='firstrow', tablefmt='psql'))
                # do we have some pending release(s)
                if 'pend' in info and len(info['pend']) > 1:
                    print(tabulate(info['pend'], headers='firstrow', tablefmt='psql'))
            else:
                banks = Bank.list()
                # Headers of output table
                banks_list = [["Name", "Type(s)", "Release", "Visibility"]]
                for bank in sorted(banks, key=lambda k: k['name']):
                    bank = Bank(bank['name'], no_log=True)
                    banks_list.append(bank.get_bank_release_info()['info'])
                print(tabulate(banks_list, headers="firstrow", tablefmt="psql"))
            sys.exit(0)

        if options.statusko:
            banks = Bank.list()
            banks_list = [["Name", "Type(s)", "Release", "Visibility"]]
            for bank in sorted(banks, key=lambda k: k['name']):
                try:
                    bank = Bank(bank['name'], no_log=True)
                    bank.load_session(UpdateWorkflow.FLOW)
                    if bank.session is not None:
                        if bank.use_last_session and not bank.session.get_status(Workflow.FLOW_OVER):
                            wf_status = bank.session.get('workflow_status')
                            if wf_status is None or not wf_status:
                                banks_list.append(bank.get_bank_release_info()['info'])
                except Exception as e:
                    print(str(e))
            print(tabulate(banks_list, headers="firstrow", tablefmt="psql"))

        if options.update:
            if not options.bank:
                print("Bank name is missing")
                sys.exit(1)
            banks = options.bank.split(',')
            gres = True
            for bank in banks:
                options.bank = bank
                bmaj = Bank(bank, options)
                print('Log file: '+bmaj.config.log_file)
                check_status = bmaj.check()
                if not check_status:
                    print('Skip bank ' + options.bank + ': wrong config')
                    gres = False
                    continue
                res = bmaj.update(depends=True)
                if not res:
                    gres = False
                Notify.notifyBankAction(bmaj)
            if not gres:
                sys.exit(1)

        if options.freeze:
            if not options.bank:
                print("Bank name is missing")
                sys.exit(1)
            if not options.release:
                print("Bank release is missing")
                sys.exit(1)
            bmaj = Bank(options.bank, options)
            res = bmaj.freeze(options.release)
            if not res:
                sys.exit(1)

        if options.unfreeze:
            if not options.bank:
                print("Bank name is missing")
                sys.exit(1)
            if not options.release:
                print("Bank release is missing")
                sys.exit(1)
            bmaj = Bank(options.bank, options)
            res = bmaj.unfreeze(options.release)
            if not res:
                sys.exit(1)

        if options.remove or options.removeall:
            if not options.bank:
                print("Bank name is missing")
                sys.exit(1)
            if options.remove and not options.release:
                print("Bank release is missing")
                sys.exit(1)
            if options.removeall:
                bmaj = Bank(options.bank, options, no_log=True)
                print('Log file: '+bmaj.config.log_file)
                res = bmaj.removeAll(options.force)
            else:
                bmaj = Bank(options.bank, options)
                print('Log file: '+bmaj.config.log_file)
                res = bmaj.remove(options.release)
                Notify.notifyBankAction(bmaj)
            if not res:
                sys.exit(1)

        if options.removepending:
            if not options.bank:
                print("Bank name is missing")
                sys.exit(1)
            bmaj = Bank(options.bank, options, no_log=True)
            print('Log file: '+bmaj.config.log_file)
            res = bmaj.remove_pending(options.release)
            if not res:
                sys.exit(1)

        if options.unpublish:
            if not options.bank:
                print("Bank name is missing")
                sys.exit(1)
            bmaj = Bank(options.bank, options, no_log=True)
            bmaj.load_session()
            bmaj.unpublish()
            sys.exit(0)

        if options.publish:
            if not options.bank:
                print("Bank name or release is missing")
                sys.exit(1)
            bmaj = Bank(options.bank, options, no_log=True)
            print('Log file: '+bmaj.config.log_file)
            bmaj.load_session()
            bank = bmaj.bank
            session = None
            if options.get_option('release') is None:
                # Get latest prod release
                if len(bank['production'])>0:
                    prod = bank['production'][len(bank['production'])-1]
                    for s in bank['sessions']:
                        if s['id'] == prod['session']:
                            session = s
                            break
            else:
                # Search production release matching release
                for prod in bank['production']:
                    if prod['release'] == options.release or prod['prod_dir'] == options.release:
                        # Search session related to this production release
                        for s in bank['sessions']:
                            if s['id'] == prod['session']:
                                session = s
                                break
                        break
            if session is None:
                print("No production session could be found for this release")
                sys.exit(1)
            bmaj.session._session = session
            bmaj.publish()
    except Exception as e:
        print(str(e))
예제 #15
0
class TestBiomajSetup(unittest.TestCase):


  def setUp(self):
    self.utils = UtilsForTest()
    curdir = os.path.dirname(os.path.realpath(__file__))
    BiomajConfig.load_config(self.utils.global_properties, allow_user_config=False)

    # Delete all banks
    b = Bank('alu')
    b.banks.remove({})

    self.config = BiomajConfig('alu')
    data_dir = self.config.get('data.dir')
    lock_file = os.path.join(data_dir,'alu.lock')
    if os.path.exists(lock_file):
      os.remove(lock_file)

  def tearDown(self):
    data_dir = self.config.get('data.dir')
    lock_file = os.path.join(data_dir,'alu.lock')
    if os.path.exists(lock_file):
      os.remove(lock_file)
    self.utils.clean()

  def test_new_bank(self):
    '''
    Checks bank init
    '''
    b = Bank('alu')

  def test_new_session(self):
    '''
    Checks an empty session is created
    '''
    b = Bank('alu')
    b.load_session(UpdateWorkflow.FLOW)
    for key in b.session._session['status'].keys():
      self.assertFalse(b.session.get_status(key))

  def test_session_reload_notover(self):
    '''
    Checks a session is used if present
    '''
    b = Bank('alu')
    for i in range(1,5):
      s = Session('alu', self.config, UpdateWorkflow.FLOW)
      s._session['status'][Workflow.FLOW_INIT] = True
      b.session = s
      b.save_session()

    b = Bank('alu')
    b.load_session(UpdateWorkflow.FLOW)
    self.assertTrue(b.session.get_status(Workflow.FLOW_INIT))

  def test_clean_old_sessions(self):
    '''
    Checks a session is used if present
    '''
    b = Bank('local')
    for i in range(1,5):
      s = Session('alu', self.config, UpdateWorkflow.FLOW)
      s._session['status'][Workflow.FLOW_INIT] = True
      b.session = s
      b.save_session()
    b2 = Bank('local')
    b2.update()
    b2.clean_old_sessions()
    self.assertTrue(len(b2.bank['sessions']) == 1)


  def test_session_reload_over(self):
    '''
    Checks a session if is not over
    '''
    b = Bank('alu')
    for i in range(1,5):
      s = Session('alu', self.config, UpdateWorkflow.FLOW)
      s._session['status'][Workflow.FLOW_INIT] = True
      s._session['status'][Workflow.FLOW_OVER] = True
      b.session = s
      b.save_session()

    b = Bank('alu')
    b.load_session(UpdateWorkflow.FLOW)
    self.assertFalse(b.session.get_status(Workflow.FLOW_INIT))

  def test_bank_list(self):
    b1 = Bank('alu')
    b2 = Bank('local')
    banks = Bank.list()
    self.assertTrue(len(banks) == 2)

  @attr('network')
  def test_get_release(self):
    '''
    Get release
    '''
    b = Bank('alu')
    b.load_session(UpdateWorkflow.FLOW)
    res = b.update()
    self.assertTrue(b.session.get('update'))
    self.assertTrue(res)
    self.assertTrue(b.session._session['release'] is not None)

  def test_remove_session(self):
    b = Bank('alu')
    for i in range(1,5):
      s = Session('alu', self.config, UpdateWorkflow.FLOW)
      s._session['status'][Workflow.FLOW_INIT] = True
      b.session = s
      b.save_session()
    self.assertTrue(len(b.bank['sessions'])==4)
    b.remove_session(b.session.get('id'))
    self.assertTrue(len(b.bank['sessions'])==3)

  @attr('process')
  def test_postprocesses_setup(self):
    b = Bank('localprocess')
    pfactory = PostProcessFactory(b)
    pfactory.run(True)
    self.assertTrue(len(pfactory.threads_tasks[0])==2)
    self.assertTrue(len(pfactory.threads_tasks[1])==1)

  @attr('process')
  def test_postprocesses_exec_again(self):
    '''
    Execute once, set a status to false, check that False processes are executed
    '''
    b = Bank('localprocess')
    pfactory = PostProcessFactory(b)
    pfactory.run()
    self.assertTrue(pfactory.blocks['BLOCK1']['META0']['PROC0'])
    self.assertTrue(pfactory.blocks['BLOCK2']['META1']['PROC1'])
    self.assertTrue(pfactory.blocks['BLOCK2']['META1']['PROC2'])
    blocks = copy.deepcopy(pfactory.blocks)
    blocks['BLOCK2']['META1']['PROC2'] = False
    pfactory2 = PostProcessFactory(b, blocks)
    pfactory2.run()
    self.assertTrue(pfactory2.blocks['BLOCK2']['META1']['PROC2'])

  @attr('process')
  def test_preprocesses(self):
    b = Bank('localprocess')
    pfactory = PreProcessFactory(b)
    pfactory.run()
    self.assertTrue(pfactory.meta_status['META0']['PROC0'])

  @attr('process')
  def test_removeprocesses(self):
    b = Bank('localprocess')
    pfactory = RemoveProcessFactory(b)
    pfactory.run()
    self.assertTrue(pfactory.meta_status['META0']['PROC0'])

  def test_dependencies_list(self):
    b = Bank('computed')
    deps = b.get_dependencies()
    self.assertTrue(len(deps)==2)
예제 #16
0
class TestBiomajFunctional(unittest.TestCase):

  def setUp(self):
    self.utils = UtilsForTest()
    curdir = os.path.dirname(os.path.realpath(__file__))
    BiomajConfig.load_config(self.utils.global_properties, allow_user_config=False)

    #Delete all banks
    b = Bank('local')
    b.banks.remove({})

    self.config = BiomajConfig('local')
    data_dir = self.config.get('data.dir')
    lock_file = os.path.join(data_dir,'local.lock')
    if os.path.exists(lock_file):
      os.remove(lock_file)

  def tearDown(self):
    data_dir = self.config.get('data.dir')
    lock_file = os.path.join(data_dir,'local.lock')
    if os.path.exists(lock_file):
      os.remove(lock_file)
    self.utils.clean()

  def test_extract_release_from_file_name(self):
    b = Bank('local')
    b.load_session(UpdateWorkflow.FLOW)
    b.session.config.set('release.file', 'test_(\d+)\.txt')
    b.session.config.set('release.regexp', '')
    w = UpdateWorkflow(b)
    w.wf_release()
    self.assertTrue(b.session.get('release') == '100')

  def test_extract_release_from_file_content(self):
    b = Bank('local')
    b.load_session(UpdateWorkflow.FLOW)
    b.session.config.set('release.file', 'test_100\.txt')
    b.session.config.set('release.regexp', 'Release\s*(\d+)')
    w = UpdateWorkflow(b)
    w.wf_release()
    self.assertTrue(b.session.get('release') == '103')

  def test_publish(self):
    '''
    Update a bank, then publish it
    '''
    b = Bank('local')
    b.update()
    current_link = os.path.join(b.config.get('data.dir'),
                                b.config.get('dir.version'),
                                'current')
    self.assertFalse(os.path.exists(current_link))
    self.assertTrue(b.bank['current'] is None)
    b.publish()
    self.assertTrue(os.path.exists(current_link))
    self.assertTrue(b.bank['current'] == b.session._session['id'])

  # Should test this on local downloader, changing 1 file to force update,
  # else we would get same bank and there would be no update
  def test_no_update(self):
      '''
      Try updating twice, at second time, bank should not be updated
      '''
      b = Bank('local')
      b.update()
      self.assertTrue(b.session.get('update'))
      b.update()
      self.assertFalse(b.session.get('update'))
      self.assertFalse(b.session.get_status(Workflow.FLOW_POSTPROCESS))

  @attr('release')
  def test_release_control(self):
    '''
    Try updating twice, at second time, modify one file (same date),
     bank should update
    '''
    b = Bank('local')
    b.update()
    b.session.config.set('keep.old.version', '3')
    self.assertTrue(b.session.get('update'))
    remote_file = b.session.config.get('remote.dir') + 'test2.fasta'
    os.utime(remote_file, None)
    # Update test2.fasta and set release.control
    b.session.config.set('release.control', 'true')
    b.update()
    self.assertTrue(b.session.get('update'))
    b.update()
    self.assertFalse(b.session.get('update'))
    b.session.config.set('remote.files', '^test2.fasta')
    b.update()
    self.assertTrue(b.session.get('update'))

  def test_fromscratch_update(self):
      '''
      Try updating twice, at second time, bank should  be updated (force with fromscratc)
      '''
      b = Bank('local')
      b.update()
      self.assertTrue(b.session.get('update'))
      sess = b.session.get('release')
      b.options.fromscratch = True
      b.update()
      self.assertTrue(b.session.get('update'))
      self.assertEqual(b.session.get('release'), sess+'__1')


  def test_fromscratch_update_with_release(self):
      '''
      Try updating twice, at second time, bank should  be updated (force with fromscratch)

      Use case with release defined in release file
      '''
      b = Bank('local')
      b.load_session(UpdateWorkflow.FLOW)
      b.session.config.set('release.file', 'test_(\d+)\.txt')
      b.session.config.set('release.regexp', '')
      w = UpdateWorkflow(b)
      w.wf_release()
      self.assertTrue(b.session.get('release') == '100')
      os.makedirs(b.session.get_full_release_directory())
      w = UpdateWorkflow(b)
      # Reset release
      b.session.set('release', None)
      w.options.fromscratch = True
      w.wf_release()
      self.assertTrue(b.session.get('release') == '100__1')


  def test_mix_stop_from_task(self):
      '''
      Get a first release, then fromscratch --stop-after, then restart from-task
      '''
      b = Bank('local')
      b.update()
      rel = b.session.get('release')
      b2 = Bank('local')
      b2.options.stop_after = 'download'
      b2.options.fromscratch = True
      res = b2.update()
      self.assertTrue(b2.session.get('release') == rel+'__1')
      b3 = Bank('local')
      res = b3.update()
      self.assertTrue(b3.session.get('release') == rel+'__1')
      self.assertTrue(res)

  def test_mix_stop_from_task2(self):
      '''
      Get a first release, then fromscratch --stop-after, then restart from-task
      '''
      b = Bank('local')
      b.update()
      rel = b.session.get('release')
      b2 = Bank('local')
      b2.options.stop_after = 'download'
      b2.options.fromscratch = True
      res = b2.update()
      self.assertTrue(b2.session.get('release') == rel+'__1')
      b3 = Bank('local')
      res = b3.update()
      b2.options.from_task = 'download'
      self.assertTrue(b3.session.get('release') == rel+'__1')
      self.assertTrue(res)

  def test_mix_stop_from_task3(self):
      '''
      Get a first release, then fromscratch --stop-after, then restart from-task
      '''
      b = Bank('local')
      b.update()
      rel = b.session.get('release')
      b2 = Bank('local')
      b2.options.stop_after = 'download'
      b2.options.fromscratch = True
      res = b2.update()
      self.assertTrue(b2.session.get('release') == rel+'__1')
      b3 = Bank('local')
      res = b3.update()
      b2.options.from_task = 'postprocess'
      self.assertTrue(b3.session.get('release') == rel+'__1')
      self.assertTrue(res)


  def test_mix_stop_from_task4(self):
      '''
      Get a first release, then fromscratch --stop-after, then restart from-task
      '''
      b = Bank('local')
      b.update()
      rel = b.session.get('release')
      b2 = Bank('local')
      b2.options.stop_before = 'download'
      b2.options.fromscratch = True
      res = b2.update()
      b3 = Bank('local')
      b3.options.from_task = 'postprocess'
      res = b3.update()
      self.assertFalse(res)

  def test_delete_old_dirs(self):
      '''
      Try updating 3 times, oldest dir should be removed
      '''
      b = Bank('local')
      b.removeAll(True)
      b = Bank('local')
      b.update()
      self.assertTrue(b.session.get('update'))
      b.options.fromscratch = True
      b.update()
      self.assertTrue(b.session.get('update'))
      self.assertTrue(len(b.bank['production']) == 2)
      b.update()
      self.assertTrue(b.session.get('update'))
      # one new dir, but olders must be deleted
      self.assertTrue(len(b.bank['production']) == 2)

  def test_delete_old_dirs_with_freeze(self):
      '''
      Try updating 3 times, oldest dir should be removed but not freezed releases
      '''
      b = Bank('local')
      b.removeAll(True)
      b = Bank('local')
      b.update()
      b.freeze(b.session.get('release'))
      self.assertTrue(b.session.get('update'))
      b.options.fromscratch = True
      b.update()
      b.freeze(b.session.get('release'))
      self.assertTrue(b.session.get('update'))
      self.assertTrue(len(b.bank['production']) == 2)
      b.update()
      self.assertTrue(b.session.get('update'))
      # one new dir, but olders must be deleted
      self.assertTrue(len(b.bank['production']) == 3)


  def test_removeAll(self):
    b = Bank('local')
    b.update()
    b.removeAll()
    self.assertFalse(os.path.exists(b.get_data_dir()))
    bdb = b.banks.find_one({'name': b.name})
    self.assertTrue(bdb is None)

  def test_remove(self):
    '''
    test removal of a production dir
    '''
    b = Bank('local')
    b.update()
    self.assertTrue(os.path.exists(b.session.get_full_release_directory()))
    self.assertTrue(len(b.bank['production'])==1)
    b.remove(b.session.get('release'))
    self.assertFalse(os.path.exists(b.session.get_full_release_directory()))
    b = Bank('local')
    self.assertTrue(len(b.bank['production'])==0)

  def test_update_stop_after(self):
    b = Bank('local')
    b.options.stop_after = 'download'
    b.update()
    self.assertTrue(b.session.get_status('download'))
    self.assertFalse(b.session.get_status('postprocess'))

  def test_update_stop_before(self):
    b = Bank('local')
    b.options.stop_before = 'postprocess'
    b.update()
    self.assertTrue(b.session.get_status('download'))
    self.assertFalse(b.session.get_status('postprocess'))

  def test_reupdate_from_task(self):
    b = Bank('local')
    b.options.stop_after = 'download'
    b.update()
    self.assertFalse(b.session.get_status('postprocess'))
    b2 = Bank('local')
    b2.options.from_task = 'postprocess'
    b2.options.release = b.session.get('release')
    b2.update()
    self.assertTrue(b2.session.get_status('postprocess'))
    self.assertEqual(b.session.get_full_release_directory(), b2.session.get_full_release_directory())

  def test_reupdate_from_task_error(self):
    b = Bank('local')
    b.options.stop_after = 'check'
    b.update()
    self.assertFalse(b.session.get_status('postprocess'))
    b2 = Bank('local')
    b2.options.from_task = 'postprocess'
    b2.options.release = b.session.get('release')
    res = b2.update()
    self.assertFalse(res)

  def test_reupdate_from_task_wrong_release(self):
    b = Bank('local')
    b.options.stop_after = 'download'
    b.update()
    self.assertFalse(b.session.get_status('postprocess'))
    b2 = Bank('local')
    b2.options.from_task = 'postprocess'
    b2.options.release = 'wrongrelease'
    res = b2.update()
    self.assertFalse(res)

  @attr('process')
  def test_postprocesses_restart_from_proc(self):
    b = Bank('localprocess')
    b.update()
    proc1file = os.path.join(b.session.get_full_release_directory(),'proc1.txt')
    proc2file = os.path.join(b.session.get_full_release_directory(),'proc2.txt')
    self.assertTrue(os.path.exists(proc1file))
    self.assertTrue(os.path.exists(proc2file))
    os.remove(proc1file)
    os.remove(proc2file)
    # Restart from postprocess, reexecute all processes
    b2 = Bank('localprocess')
    b2.options.from_task = 'postprocess'
    b2.options.release = b.session.get('release')
    b2.update()
    self.assertTrue(os.path.exists(proc1file))
    self.assertTrue(os.path.exists(proc2file))
    os.remove(proc1file)
    os.remove(proc2file)
    # Restart from postprocess, but at process PROC2 and following
    b3 = Bank('localprocess')
    b3.options.from_task = 'postprocess'
    b3.options.process = 'PROC2'
    b3.options.release = b.session.get('release')
    b3.update()
    #self.assertFalse(os.path.exists(proc1file))
    self.assertTrue(os.path.exists(proc2file))

  def test_computed(self):
    b = Bank('computed')
    res = b.update(True)
    self.assertTrue(res)
    self.assertTrue(os.path.exists(b.session.get_full_release_directory()+'/sub1/flat/test_100.txt'))
    self.assertTrue(b.session.get('update'))
    # Check that, with depends non updated, bank is not updated itself
    nextb = Bank('computed')
    res = nextb.update(True)
    self.assertFalse(nextb.session.get('update'))


  @attr('nofile')
  def test_computed_nofile(self):
    b = Bank('computed2')
    b.load_session(UpdateWorkflow.FLOW)
    b.session.config.set('protocol', 'none')
    b.session.config.set('sub1.files.move', 'flat/test_.*')
    res = b.update(True)
    self.assertTrue(res)
    self.assertTrue(os.path.exists(b.session.get_full_release_directory()+'/sub1/flat/test_100.txt'))


  def test_computed_ref_release(self):
    b = Bank('computed2')
    res = b.update(True)
    b2 = Bank('sub1')
    b2release = b2.bank['production'][len(b2.bank['production'])-1]['release']
    brelease = b.bank['production'][len(b.bank['production'])-1]['release']
    self.assertTrue(res)
    self.assertTrue(brelease == b2release)

  @attr('computed')
  def test_computed_ref_release(self):
    b = Bank('computed2')
    res = b.update(True)
    self.assertTrue(b.session.get('update'))
    b2 = Bank('computed2')
    res = b2.update(True)
    self.assertFalse(b2.session.get('update'))

  def test_computederror(self):
    b = Bank('computederror')
    res = b.update(True)
    self.assertFalse(res)
    self.assertTrue(b.session._session['depends']['sub2'])
    self.assertFalse(b.session._session['depends']['error'])


  @attr('directrelease')
  def test_directhttp_release(self):
      b = Bank('directhttp')
      res = b.update()
      self.assertTrue(b.session.get('update'))
      self.assertTrue(os.path.exists(b.session.get_full_release_directory()+'/flat/debian/README.html'))
      #print str(b.session.get('release'))
      #print str(b.session.get('remoterelease'))

  @attr('network')
  def test_multi(self):
    b = Bank('multi')
    res = b.update()
    with open(os.path.join(b.session.get_full_release_directory(),'flat/test1.json'), 'r') as content_file:
      content = content_file.read()
      my_json = json.loads(content)
      self.assertTrue(my_json['args']['key1'] == 'value1')
    with open(os.path.join(b.session.get_full_release_directory(),'flat/test2.json'), 'r') as content_file:
      content = content_file.read()
      my_json = json.loads(content)
      self.assertTrue(my_json['form']['key1'] == 'value1')

  def test_freeze(self):
    b = Bank('local')
    b.update()
    rel = b.session.get('release')
    b.freeze(rel)
    prod = b.get_production(rel)
    self.assertTrue(prod['freeze'] == True)
    res = b.remove(rel)
    self.assertTrue(res == False)
    b.unfreeze(rel)
    prod = b.get_production(rel)
    self.assertTrue(prod['freeze'] == False)
    res = b.remove(rel)
    self.assertTrue(res == True)


  def test_stats(self):
    b = Bank('local')
    b.update()
    rel = b.session.get('release')
    stats = Bank.get_banks_disk_usage()
    self.assertTrue(stats[0]['size']>0)
    for release in stats[0]['releases']:
      if release['name'] == rel:
        self.assertTrue(release['size']>0)


  @attr('process')
  def test_processes_meta_data(self):
    b = Bank('localprocess')
    b.update()
    formats = b.session.get('formats')
    self.assertTrue(len(formats['blast'])==2)
    self.assertTrue(len(formats['test'][0]['files'])==3)

  @attr('process')
  def test_search(self):
    b = Bank('localprocess')
    b.update()
    search_res = Bank.search(['blast'],[])
    self.assertTrue(len(search_res)==1)
    search_res = Bank.search([],['nucleic'])
    self.assertTrue(len(search_res)==1)
    search_res = Bank.search(['blast'],['nucleic'])
    self.assertTrue(len(search_res)==1)
    search_res = Bank.search(['blast'],['proteic'])
    self.assertTrue(len(search_res)==0)


  def test_owner(self):
    '''
    test ACL with owner
    '''
    b = Bank('local')
    res = b.update()
    self.assertTrue(res)
    b.set_owner('sample')
    b2 = Bank('local')
    try:
      res = b2.update()
      self.fail('not owner, should not be allowed')
    except Exception as e:
      pass
예제 #17
0
class TestBiomajSetup(unittest.TestCase):
    def setUp(self):
        self.utils = UtilsForTest()
        curdir = os.path.dirname(os.path.realpath(__file__))
        BiomajConfig.load_config(self.utils.global_properties, allow_user_config=False)

        # Delete all banks
        b = Bank("alu")
        b.banks.remove({})

        self.config = BiomajConfig("alu")
        data_dir = self.config.get("data.dir")
        lock_file = os.path.join(data_dir, "alu.lock")
        if os.path.exists(lock_file):
            os.remove(lock_file)

    def tearDown(self):
        data_dir = self.config.get("data.dir")
        lock_file = os.path.join(data_dir, "alu.lock")
        if os.path.exists(lock_file):
            os.remove(lock_file)
        self.utils.clean()

    def test_new_bank(self):
        """
    Checks bank init
    """
        b = Bank("alu")

    def test_new_session(self):
        """
    Checks an empty session is created
    """
        b = Bank("alu")
        b.load_session(UpdateWorkflow.FLOW)
        for key in b.session._session["status"].keys():
            self.assertFalse(b.session.get_status(key))

    def test_session_reload_notover(self):
        """
    Checks a session is used if present
    """
        b = Bank("alu")
        for i in range(1, 5):
            s = Session("alu", self.config, UpdateWorkflow.FLOW)
            s._session["status"][Workflow.FLOW_INIT] = True
            b.session = s
            b.save_session()

        b = Bank("alu")
        b.load_session(UpdateWorkflow.FLOW)
        self.assertTrue(b.session.get_status(Workflow.FLOW_INIT))

    def test_clean_old_sessions(self):
        """
    Checks a session is used if present
    """
        b = Bank("local")
        for i in range(1, 5):
            s = Session("alu", self.config, UpdateWorkflow.FLOW)
            s._session["status"][Workflow.FLOW_INIT] = True
            b.session = s
            b.save_session()
        b2 = Bank("local")
        b2.update()
        b2.clean_old_sessions()
        self.assertTrue(len(b2.bank["sessions"]) == 1)

    def test_session_reload_over(self):
        """
    Checks a session if is not over
    """
        b = Bank("alu")
        for i in range(1, 5):
            s = Session("alu", self.config, UpdateWorkflow.FLOW)
            s._session["status"][Workflow.FLOW_INIT] = True
            s._session["status"][Workflow.FLOW_OVER] = True
            b.session = s
            b.save_session()

        b = Bank("alu")
        b.load_session(UpdateWorkflow.FLOW)
        self.assertFalse(b.session.get_status(Workflow.FLOW_INIT))

    def test_bank_list(self):
        b1 = Bank("alu")
        b2 = Bank("local")
        banks = Bank.list()
        self.assertTrue(len(banks) == 2)

    @attr("network")
    def test_get_release(self):
        """
    Get release
    """
        b = Bank("alu")
        b.load_session(UpdateWorkflow.FLOW)
        res = b.update()
        self.assertTrue(b.session.get("update"))
        self.assertTrue(res)
        self.assertTrue(b.session._session["release"] is not None)

    def test_remove_session(self):
        b = Bank("alu")
        for i in range(1, 5):
            s = Session("alu", self.config, UpdateWorkflow.FLOW)
            s._session["status"][Workflow.FLOW_INIT] = True
            b.session = s
            b.save_session()
        self.assertTrue(len(b.bank["sessions"]) == 4)
        b.remove_session(b.session.get("id"))
        self.assertTrue(len(b.bank["sessions"]) == 3)

    @attr("process")
    def test_postprocesses_setup(self):
        b = Bank("localprocess")
        pfactory = PostProcessFactory(b)
        pfactory.run(True)
        self.assertTrue(len(pfactory.threads_tasks[0]) == 2)
        self.assertTrue(len(pfactory.threads_tasks[1]) == 1)

    @attr("process")
    def test_postprocesses_exec_again(self):
        """
    Execute once, set a status to false, check that False processes are executed
    """
        b = Bank("localprocess")
        pfactory = PostProcessFactory(b)
        pfactory.run()
        self.assertTrue(pfactory.blocks["BLOCK1"]["META0"]["PROC0"])
        self.assertTrue(pfactory.blocks["BLOCK2"]["META1"]["PROC1"])
        self.assertTrue(pfactory.blocks["BLOCK2"]["META1"]["PROC2"])
        blocks = copy.deepcopy(pfactory.blocks)
        blocks["BLOCK2"]["META1"]["PROC2"] = False
        pfactory2 = PostProcessFactory(b, blocks)
        pfactory2.run()
        self.assertTrue(pfactory2.blocks["BLOCK2"]["META1"]["PROC2"])

    @attr("process")
    def test_preprocesses(self):
        b = Bank("localprocess")
        pfactory = PreProcessFactory(b)
        pfactory.run()
        self.assertTrue(pfactory.meta_status["META0"]["PROC0"])

    @attr("process")
    def test_removeprocesses(self):
        b = Bank("localprocess")
        pfactory = RemoveProcessFactory(b)
        pfactory.run()
        self.assertTrue(pfactory.meta_status["META0"]["PROC0"])

    def test_dependencies_list(self):
        b = Bank("computed")
        deps = b.get_dependencies()
        self.assertTrue(len(deps) == 2)
예제 #18
0
 def setUp(self):
   self.utils = UtilsForTest()
   BiomajConfig.load_config(self.utils.global_properties, allow_user_config=False)
   self.config = BiomajConfig('testhttp')
예제 #19
0
 def setUp(self):
     self.utils = UtilsForTest()
     self.curdir = os.path.dirname(os.path.realpath(__file__))
     BiomajConfig.load_config(self.utils.global_properties,
                              allow_user_config=False)
예제 #20
0
def main():

    parser = argparse.ArgumentParser(add_help=False)
    parser.add_argument('-c',
                        '--config',
                        dest="config",
                        help="Configuration file")
    parser.add_argument('--check',
                        dest="check",
                        help="Check bank property file",
                        action="store_true",
                        default=False)
    parser.add_argument('-u',
                        '--update',
                        dest="update",
                        help="Update action",
                        action="store_true",
                        default=False)
    parser.add_argument('--fromscratch',
                        dest="fromscratch",
                        help="Force a new cycle update",
                        action="store_true",
                        default=False)
    parser.add_argument('-z',
                        '--from-scratch',
                        dest="fromscratch",
                        help="Force a new cycle update",
                        action="store_true",
                        default=False)
    parser.add_argument('-p',
                        '--publish',
                        dest="publish",
                        help="Publish",
                        action="store_true",
                        default=False)
    parser.add_argument('--unpublish',
                        dest="unpublish",
                        help="Unpublish",
                        action="store_true",
                        default=False)

    parser.add_argument('--release',
                        dest="release",
                        help="release of the bank")
    parser.add_argument(
        '--from-task',
        dest="from_task",
        help="Start cycle at a specific task (init always executed)")
    parser.add_argument(
        '--process',
        dest="process",
        help=
        "Linked to from-task, optionally specify a block, meta or process name to start from"
    )
    parser.add_argument('-l', '--log', dest="log", help="log level")
    parser.add_argument('-r',
                        '--remove',
                        dest="remove",
                        help="Remove a bank release",
                        action="store_true",
                        default=False)
    parser.add_argument('--remove-all',
                        dest="removeall",
                        help="Remove all bank releases and database records",
                        action="store_true",
                        default=False)
    parser.add_argument('--remove-pending',
                        dest="removepending",
                        help="Remove pending release",
                        action="store_true",
                        default=False)
    parser.add_argument('-s',
                        '--status',
                        dest="status",
                        help="Get status",
                        action="store_true",
                        default=False)
    parser.add_argument('-b', '--bank', dest="bank", help="bank name")
    parser.add_argument('--owner',
                        dest="owner",
                        help="change owner of the bank")
    parser.add_argument('--stop-before',
                        dest="stop_before",
                        help="Store workflow before task")
    parser.add_argument('--stop-after',
                        dest="stop_after",
                        help="Store workflow after task")
    parser.add_argument('--freeze',
                        dest="freeze",
                        help="Freeze a bank release",
                        action="store_true",
                        default=False)
    parser.add_argument('--unfreeze',
                        dest="unfreeze",
                        help="Unfreeze a bank release",
                        action="store_true",
                        default=False)
    parser.add_argument('-f',
                        '--force',
                        dest="force",
                        help="Force action",
                        action="store_true",
                        default=False)
    parser.add_argument('-h',
                        '--help',
                        dest="help",
                        help="Show usage",
                        action="store_true",
                        default=False)

    parser.add_argument('--search',
                        dest="search",
                        help="Search by format and types",
                        action="store_true",
                        default=False)
    parser.add_argument('--formats',
                        dest="formats",
                        help="List of formats to search, comma separated")
    parser.add_argument('--types',
                        dest="types",
                        help="List of types to search, comma separated")
    parser.add_argument('--query',
                        dest="query",
                        help="Lucene query syntax to search in index")

    parser.add_argument('--show',
                        dest="show",
                        help="Show format files for selected bank",
                        action="store_true",
                        default=False)

    parser.add_argument('-n',
                        '--change-dbname',
                        dest="newbank",
                        help="Change old bank name to this new bank name")
    parser.add_argument(
        '-e',
        '--move-production-directories',
        dest="newdir",
        help=
        "Change bank production directories location to this new path, path must exists"
    )
    parser.add_argument('--visibility',
                        dest="visibility",
                        help="visibility status of the bank")

    parser.add_argument('--maintenance',
                        dest="maintenance",
                        help="Maintenance mode (on/off/status)")

    parser.add_argument('--version',
                        dest="version",
                        help="Show version",
                        action="store_true",
                        default=False)
    parser.add_argument('--status-ko',
                        dest="statusko",
                        help="Get bank in KO status",
                        action="store_true",
                        default=False)

    options = Options()
    parser.parse_args(namespace=options)

    options.no_log = False

    if options.help:
        print('''
    --config: global.properties file path

    --status: list of banks with published release
        [OPTIONAL]
        --bank xx / bank: Get status details of bank

    --status-ko: list of banks in error status (last run)

    --log DEBUG|INFO|WARN|ERR  [OPTIONAL]: set log level in logs for this run, default is set in global.properties file

    --check: Check bank property file
        [MANDATORY]
        --bank xx: name of the bank to check (will check xx.properties)

    --owner yy: Change owner of the bank (user id)
        [MANDATORY]
        --bank xx: name of the bank

    --visibility public|private: change visibility public/private of a bank
        [MANDATORY]
        --bank xx: name of the bank

    --change-dbname yy: Change name of the bank to this new name
        [MANDATORY]
        --bank xx: current name of the bank

    --move-production-directories yy: Change bank production directories location to this new path, path must exists
        [MANDATORY]
        --bank xx: current name of the bank

    --update: Update bank
        [MANDATORY]
        --bank xx: name of the bank(s) to update, comma separated
        [OPTIONAL]
        --publish: after update set as *current* version
        --from-scratch: force a new update cycle, even if release is identical, release will be incremented like (myrel_1)
        --stop-before xx: stop update cycle before the start of step xx
        --stop-after xx: stop update cycle after step xx has completed
        --from-task xx --release yy: Force an re-update cycle for bank release *yy* or from current cycle (in production directories), skipping steps up to *xx*
        --process xx: linked to from-task, optionally specify a block, meta or process name to start from
        --release xx: release to update

    --publish: Publish bank as current release to use
        [MANDATORY]
        --bank xx: name of the bank to update
        --release xx: release of the bank to publish
    --unpublish: Unpublish bank (remove current)
        [MANDATORY]
        --bank xx: name of the bank to update

    --remove-all: Remove all bank releases and database records
        [MANDATORY]
        --bank xx: name of the bank to update
        [OPTIONAL]
        --force: remove freezed releases

    --remove-pending: Remove pending releases
        [MANDATORY]
        --bank xx: name of the bank to update

    --remove: Remove bank release (files and database release)
        [MANDATORY]
        --bank xx: name of the bank to update
        --release xx: release of the bank to remove

        Release must not be the *current* version. If this is the case, publish a new release before.

    --freeze: Freeze bank release (cannot be removed)
        [MANDATORY]
        --bank xx: name of the bank to update
        --release xx: release of the bank to remove

    --unfreeze: Unfreeze bank release (can be removed)
        [MANDATORY]
        --bank xx: name of the bank to update
        --release xx: release of the bank to remove

    --search: basic search in bank production releases, return list of banks
       --formats xx,yy : list of comma separated format
      AND/OR
       --types xx,yy : list of comma separated type

       --query "LUCENE query syntax": search in index (if activated)

    --show: Show bank files per format
      [MANDATORY]
      --bank xx: name of the bank to show
      [OPTIONAL]
      --release xx: release of the bank to show

    --maintenance on/off/status: (un)set biomaj in maintenance mode to prevent updates/removal

        ''')
        return

    if options.version:
        version = pkg_resources.require('biomaj')[0].version
        print('Version: ' + str(version))
        return

    if options.stop_after or options.stop_before or options.from_task:
        available_steps = []
        for flow in UpdateWorkflow.FLOW:
            available_steps.append(flow['name'])
        for flow in RemoveWorkflow.FLOW:
            available_steps.append(flow['name'])
        if options.stop_after:
            if options.stop_after not in available_steps:
                print('Invalid step: ' + options.stop_after)
                sys.exit(1)
        if options.stop_before:
            if options.stop_before not in available_steps:
                print('Invalid step: ' + options.stop_before)
                sys.exit(1)
        if options.from_task:
            if options.from_task not in available_steps:
                print('Invalid step: ' + options.from_task)
                sys.exit(1)

    bmaj = None
    try:
        if options.config is not None:
            BiomajConfig.load_config(options.config)
        else:
            BiomajConfig.load_config()
    except Exception as e:
        print(str(e))
        sys.exit(1)

    try:

        if options.maintenance:
            if options.maintenance not in ['on', 'off', 'status']:
                print("Wrong maintenance value [on,off,status]")
                sys.exit(1)
            data_dir = BiomajConfig.global_config.get('GENERAL', 'data.dir')
            if BiomajConfig.global_config.has_option('GENERAL', 'lock.dir'):
                lock_dir = BiomajConfig.global_config.get(
                    'GENERAL', 'lock.dir')
            else:
                lock_dir = data_dir
            maintenance_lock_file = os.path.join(lock_dir, 'biomaj.lock')
            if options.maintenance == 'status':
                if os.path.exists(maintenance_lock_file):
                    print("Maintenance: On")
                else:
                    print("Maintenance: Off")
                sys.exit(0)
            if options.maintenance == 'on':
                f = open(maintenance_lock_file, 'w')
                f.write('1')
                f.close()
                print("Maintenance set to On")
                sys.exit(0)
            if options.maintenance == 'off':
                if os.path.exists(maintenance_lock_file):
                    os.remove(maintenance_lock_file)
                print("Maintenance set to Off")
                sys.exit(0)

        if options.owner:
            if not options.bank:
                print("Bank option is missing")
                sys.exit(1)
            bank = Bank(options.bank, no_log=True)
            bank.set_owner(options.owner)
            sys.exit(0)

        if options.visibility:
            if not options.bank:
                print("Bank option is missing")
                sys.exit(1)
            if options.visibility not in ['public', 'private']:
                print("Valid values are public|private")
                sys.exit(1)
            bank = Bank(options.bank, no_log=True)
            bank.set_visibility(options.visibility)
            print(
                "Do not forget to update accordingly the visibility.default parameter in the configuration file"
            )
            sys.exit(0)

        if options.newdir:
            if not options.bank:
                print("Bank option is missing")
                sys.exit(1)
            if not os.path.exists(options.newdir):
                print("Destination directory does not exists")
            bank = Bank(options.bank, options=options, no_log=True)
            if not bank.bank['production']:
                print("Nothing to move, no production directory")
                sys.exit(0)
            bank.load_session(Workflow.FLOW, None)
            w = Workflow(bank)
            res = w.wf_init()
            if not res:
                sys.exit(1)
            for prod in bank.bank['production']:
                session = bank.get_session_from_release(prod['release'])
                bank.load_session(Workflow.FLOW, session)
                prod_path = bank.session.get_full_release_directory()
                if os.path.exists(prod_path):
                    shutil.move(prod_path, options.newdir)
                prod['data_dir'] = options.newdir
            bank.banks.update(
                {'name': options.bank},
                {'$set': {
                    'production': bank.bank['production']
                }})
            print("Bank production directories moved to " + options.newdir)
            print(
                "WARNING: do not forget to update accordingly the data.dir and dir.version properties"
            )
            w.wf_over()
            sys.exit(0)

        if options.newbank:
            if not options.bank:
                print("Bank option is missing")
                sys.exit(1)
            bank = Bank(options.bank, no_log=True)
            conf_dir = BiomajConfig.global_config.get('GENERAL', 'conf.dir')
            bank_prop_file = os.path.join(conf_dir,
                                          options.bank + '.properties')
            config_bank = configparser.SafeConfigParser()
            config_bank.read(
                [os.path.join(conf_dir, options.bank + '.properties')])
            config_bank.set('GENERAL', 'db.name', options.newbank)
            newbank_prop_file = open(
                os.path.join(conf_dir, options.newbank + '.properties'), 'w')
            config_bank.write(newbank_prop_file)
            newbank_prop_file.close()
            bank.banks.update({'name': options.bank},
                              {'$set': {
                                  'name': options.newbank
                              }})
            os.remove(bank_prop_file)
            print("Bank " + options.bank + " renamed to " + options.newbank)
            sys.exit(0)

        if options.search:
            if options.query:
                res = Bank.searchindex(options.query)
                print("Query matches for :" + options.query)
                results = [["Release", "Format(s)", "Type(s)", "Files"]]
                for match in res:
                    results.append([
                        match['_source']['release'],
                        str(match['_source']['format']),
                        str(match['_source']['types']),
                        ','.join(match['_source']['files'])
                    ])
                print(tabulate(results, headers="firstrow", tablefmt="grid"))
            else:
                formats = []
                if options.formats:
                    formats = options.formats.split(',')
                types = []
                if options.types:
                    types = options.types.split(',')
                print("Search by formats=" + str(formats) + ", types=" +
                      str(types))
                res = Bank.search(formats, types, False)
                results = [[
                    "Name", "Release", "Format(s)", "Type(s)", 'Published'
                ]]
                for bank in sorted(res, key=lambda bank: (bank['name'])):
                    b = bank['name']
                    bank['production'].sort(key=lambda n: n['release'],
                                            reverse=True)
                    for prod in bank['production']:
                        iscurrent = ""
                        if prod['session'] == bank['current']:
                            iscurrent = "yes"
                        results.append([
                            b if b else '', prod['release'],
                            ','.join(prod['formats']), ','.join(prod['types']),
                            iscurrent
                        ])
                print(tabulate(results, headers="firstrow", tablefmt="grid"))
                sys.exit(0)

        if options.show:
            if not options.bank:
                print("Bank option is required")
                sys.exit(1)

            bank = Bank(options.bank, no_log=True)
            results = [[
                "Name", "Release", "Format(s)", "Type(s)", "Tag(s)", "File(s)"
            ]]
            current = None
            fformat = None
            if 'current' in bank.bank and bank.bank['current']:
                current = bank.bank['current']
            for prod in bank.bank['production']:
                include = True
                release = prod['release']
                if current == prod['session']:
                    release += ' (current)'
                if options.release and (prod['release'] != options.release and
                                        prod['prod_dir'] != options.release):
                    include = False
                if include:
                    session = bank.get_session_from_release(prod['release'])
                    formats = session['formats']
                    afiles = []
                    atags = []
                    atypes = []
                    for fformat in list(formats.keys()):
                        for elt in formats[fformat]:
                            atypes.append(','.join(elt['types']))
                            for tag in list(elt['tags'].keys()):
                                atags.append(elt['tags'][tag])
                            for eltfile in elt['files']:
                                afiles.append(eltfile)
                    results.append([
                        bank.bank['name'], release, fformat, ','.join(atypes),
                        ','.join(atags), ','.join(afiles)
                    ])
            print(tabulate(results, headers="firstrow", tablefmt="grid"))
            sys.exit(0)

        if options.check:
            if not options.bank:
                print("Bank name is missing")
                sys.exit(1)
            bank = Bank(options.bank, no_log=True)
            print(options.bank + " check: " + str(bank.check()) + "\n")
            sys.exit(0)

        if options.status:
            if options.bank:
                bank = Bank(options.bank, no_log=True)
                info = bank.get_bank_release_info(full=True)
                print(
                    tabulate(info['info'], headers='firstrow',
                             tablefmt='psql'))
                print(
                    tabulate(info['prod'], headers='firstrow',
                             tablefmt='psql'))
                # do we have some pending release(s)
                if 'pend' in info and len(info['pend']) > 1:
                    print(
                        tabulate(info['pend'],
                                 headers='firstrow',
                                 tablefmt='psql'))
            else:
                banks = Bank.list()
                # Headers of output table
                banks_list = [["Name", "Type(s)", "Release", "Visibility"]]
                for bank in sorted(banks, key=lambda k: k['name']):
                    bank = Bank(bank['name'], no_log=True)
                    banks_list.append(bank.get_bank_release_info()['info'])
                print(tabulate(banks_list, headers="firstrow",
                               tablefmt="psql"))
            sys.exit(0)

        if options.statusko:
            banks = Bank.list()
            banks_list = [[
                "Name", "Type(s)", "Release", "Visibility", "Last run"
            ]]
            for bank in sorted(banks, key=lambda k: k['name']):
                try:
                    bank = Bank(bank['name'], no_log=True)
                    bank.load_session(UpdateWorkflow.FLOW)
                    if bank.session is not None:
                        if bank.use_last_session and not bank.session.get_status(
                                Workflow.FLOW_OVER):
                            wf_status = bank.session.get('workflow_status')
                            if wf_status is None or not wf_status:
                                banks_list.append(
                                    bank.get_bank_release_info()['info'])
                except Exception as e:
                    print(str(e))
            print(tabulate(banks_list, headers="firstrow", tablefmt="psql"))

        if options.update:
            if not options.bank:
                print("Bank name is missing")
                sys.exit(1)
            banks = options.bank.split(',')
            gres = True
            for bank in banks:
                options.bank = bank
                bmaj = Bank(bank, options)
                print('Log file: ' + bmaj.config.log_file)
                check_status = bmaj.check()
                if not check_status:
                    print('Skip bank ' + options.bank + ': wrong config')
                    gres = False
                    continue
                res = bmaj.update(depends=True)
                if not res:
                    gres = False
                Notify.notifyBankAction(bmaj)
            if not gres:
                sys.exit(1)

        if options.freeze:
            if not options.bank:
                print("Bank name is missing")
                sys.exit(1)
            if not options.release:
                print("Bank release is missing")
                sys.exit(1)
            bmaj = Bank(options.bank, options)
            res = bmaj.freeze(options.release)
            if not res:
                sys.exit(1)

        if options.unfreeze:
            if not options.bank:
                print("Bank name is missing")
                sys.exit(1)
            if not options.release:
                print("Bank release is missing")
                sys.exit(1)
            bmaj = Bank(options.bank, options)
            res = bmaj.unfreeze(options.release)
            if not res:
                sys.exit(1)

        if options.remove or options.removeall:
            if not options.bank:
                print("Bank name is missing")
                sys.exit(1)
            if options.remove and not options.release:
                print("Bank release is missing")
                sys.exit(1)
            if options.removeall:
                bmaj = Bank(options.bank, options, no_log=True)
                print('Log file: ' + bmaj.config.log_file)
                res = bmaj.removeAll(options.force)
            else:
                bmaj = Bank(options.bank, options)
                print('Log file: ' + bmaj.config.log_file)
                res = bmaj.remove(options.release)
                Notify.notifyBankAction(bmaj)
            if not res:
                sys.exit(1)

        if options.removepending:
            if not options.bank:
                print("Bank name is missing")
                sys.exit(1)
            bmaj = Bank(options.bank, options, no_log=True)
            res = bmaj.remove_pending(options.release)
            if not res:
                sys.exit(1)

        if options.unpublish:
            if not options.bank:
                print("Bank name is missing")
                sys.exit(1)
            bmaj = Bank(options.bank, options, no_log=True)
            bmaj.load_session()
            bmaj.unpublish()
            sys.exit(0)

        if options.publish:
            if not options.bank:
                print("Bank name or release is missing")
                sys.exit(1)
            bmaj = Bank(options.bank, options, no_log=True)
            bmaj.load_session()
            bank = bmaj.bank
            session = None
            if options.get_option('release') is None:
                # Get latest prod release
                if len(bank['production']) > 0:
                    prod = bank['production'][len(bank['production']) - 1]
                    for s in bank['sessions']:
                        if s['id'] == prod['session']:
                            session = s
                            break
            else:
                # Search production release matching release
                for prod in bank['production']:
                    if prod['release'] == options.release or prod[
                            'prod_dir'] == options.release:
                        # Search session related to this production release
                        for s in bank['sessions']:
                            if s['id'] == prod['session']:
                                session = s
                                break
                        break
            if session is None:
                print("No production session could be found for this release")
                sys.exit(1)
            bmaj.session._session = session
            bmaj.publish()
    except Exception as e:
        print(str(e))
예제 #21
0
class TestBiomajFunctional(unittest.TestCase):
    def setUp(self):
        self.utils = UtilsForTest()
        curdir = os.path.dirname(os.path.realpath(__file__))
        BiomajConfig.load_config(self.utils.global_properties,
                                 allow_user_config=False)

        #Delete all banks
        b = Bank('local')
        b.banks.remove({})

        self.config = BiomajConfig('local')
        data_dir = self.config.get('data.dir')
        lock_file = os.path.join(data_dir, 'local.lock')
        if os.path.exists(lock_file):
            os.remove(lock_file)

    def tearDown(self):
        data_dir = self.config.get('data.dir')
        lock_file = os.path.join(data_dir, 'local.lock')
        if os.path.exists(lock_file):
            os.remove(lock_file)
        self.utils.clean()

    def test_extract_release_from_file_name(self):
        b = Bank('local')
        b.load_session(UpdateWorkflow.FLOW)
        b.session.config.set('release.file', 'test_(\d+)\.txt')
        b.session.config.set('release.regexp', '')
        w = UpdateWorkflow(b)
        w.wf_release()
        self.assertTrue(b.session.get('release') == '100')

    def test_extract_release_from_file_content(self):
        b = Bank('local')
        b.load_session(UpdateWorkflow.FLOW)
        b.session.config.set('release.file', 'test_100\.txt')
        b.session.config.set('release.regexp', 'Release\s*(\d+)')
        w = UpdateWorkflow(b)
        w.wf_release()
        self.assertTrue(b.session.get('release') == '103')

    def test_publish(self):
        '''
    Update a bank, then publish it
    '''
        b = Bank('local')
        b.update()
        current_link = os.path.join(b.config.get('data.dir'),
                                    b.config.get('dir.version'), 'current')
        self.assertFalse(os.path.exists(current_link))
        self.assertTrue(b.bank['current'] is None)
        b.publish()
        self.assertTrue(os.path.exists(current_link))
        self.assertTrue(b.bank['current'] == b.session._session['id'])

    # Should test this on local downloader, changing 1 file to force update,
    # else we would get same bank and there would be no update
    def test_no_update(self):
        '''
      Try updating twice, at second time, bank should not be updated
      '''
        b = Bank('local')
        b.update()
        self.assertTrue(b.session.get('update'))
        b.update()
        self.assertFalse(b.session.get('update'))
        self.assertFalse(b.session.get_status(Workflow.FLOW_POSTPROCESS))

    @attr('release')
    def test_release_control(self):
        '''
    Try updating twice, at second time, modify one file (same date),
     bank should update
    '''
        b = Bank('local')
        b.update()
        b.session.config.set('keep.old.version', '3')
        self.assertTrue(b.session.get('update'))
        remote_file = b.session.config.get('remote.dir') + 'test2.fasta'
        os.utime(remote_file, None)
        # Update test2.fasta and set release.control
        b.session.config.set('release.control', 'true')
        b.update()
        self.assertTrue(b.session.get('update'))
        b.update()
        self.assertFalse(b.session.get('update'))
        b.session.config.set('remote.files', '^test2.fasta')
        b.update()
        self.assertTrue(b.session.get('update'))

    def test_fromscratch_update(self):
        '''
      Try updating twice, at second time, bank should  be updated (force with fromscratc)
      '''
        b = Bank('local')
        b.update()
        self.assertTrue(b.session.get('update'))
        sess = b.session.get('release')
        b.options.fromscratch = True
        b.update()
        self.assertTrue(b.session.get('update'))
        self.assertEqual(b.session.get('release'), sess + '__1')

    def test_fromscratch_update_with_release(self):
        '''
      Try updating twice, at second time, bank should  be updated (force with fromscratch)

      Use case with release defined in release file
      '''
        b = Bank('local')
        b.load_session(UpdateWorkflow.FLOW)
        b.session.config.set('release.file', 'test_(\d+)\.txt')
        b.session.config.set('release.regexp', '')
        w = UpdateWorkflow(b)
        w.wf_release()
        self.assertTrue(b.session.get('release') == '100')
        os.makedirs(b.session.get_full_release_directory())
        w = UpdateWorkflow(b)
        # Reset release
        b.session.set('release', None)
        w.options.fromscratch = True
        w.wf_release()
        self.assertTrue(b.session.get('release') == '100__1')

    def test_mix_stop_from_task(self):
        '''
      Get a first release, then fromscratch --stop-after, then restart from-task
      '''
        b = Bank('local')
        b.update()
        rel = b.session.get('release')
        b2 = Bank('local')
        b2.options.stop_after = 'download'
        b2.options.fromscratch = True
        res = b2.update()
        self.assertTrue(b2.session.get('release') == rel + '__1')
        b3 = Bank('local')
        res = b3.update()
        self.assertTrue(b3.session.get('release') == rel + '__1')
        self.assertTrue(res)

    def test_mix_stop_from_task2(self):
        '''
      Get a first release, then fromscratch --stop-after, then restart from-task
      '''
        b = Bank('local')
        b.update()
        rel = b.session.get('release')
        b2 = Bank('local')
        b2.options.stop_after = 'download'
        b2.options.fromscratch = True
        res = b2.update()
        self.assertTrue(b2.session.get('release') == rel + '__1')
        b3 = Bank('local')
        res = b3.update()
        b2.options.from_task = 'download'
        self.assertTrue(b3.session.get('release') == rel + '__1')
        self.assertTrue(res)

    def test_mix_stop_from_task3(self):
        '''
      Get a first release, then fromscratch --stop-after, then restart from-task
      '''
        b = Bank('local')
        b.update()
        rel = b.session.get('release')
        b2 = Bank('local')
        b2.options.stop_after = 'download'
        b2.options.fromscratch = True
        res = b2.update()
        self.assertTrue(b2.session.get('release') == rel + '__1')
        b3 = Bank('local')
        res = b3.update()
        b2.options.from_task = 'postprocess'
        self.assertTrue(b3.session.get('release') == rel + '__1')
        self.assertTrue(res)

    def test_mix_stop_from_task4(self):
        '''
      Get a first release, then fromscratch --stop-after, then restart from-task
      '''
        b = Bank('local')
        b.update()
        rel = b.session.get('release')
        b2 = Bank('local')
        b2.options.stop_before = 'download'
        b2.options.fromscratch = True
        res = b2.update()
        b3 = Bank('local')
        b3.options.from_task = 'postprocess'
        res = b3.update()
        self.assertFalse(res)

    def test_delete_old_dirs(self):
        '''
      Try updating 3 times, oldest dir should be removed
      '''
        b = Bank('local')
        b.removeAll(True)
        b = Bank('local')
        b.update()
        self.assertTrue(b.session.get('update'))
        b.options.fromscratch = True
        b.update()
        self.assertTrue(b.session.get('update'))
        self.assertTrue(len(b.bank['production']) == 2)
        b.update()
        self.assertTrue(b.session.get('update'))
        # one new dir, but olders must be deleted
        self.assertTrue(len(b.bank['production']) == 2)

    def test_delete_old_dirs_with_freeze(self):
        '''
      Try updating 3 times, oldest dir should be removed but not freezed releases
      '''
        b = Bank('local')
        b.removeAll(True)
        b = Bank('local')
        b.update()
        b.freeze(b.session.get('release'))
        self.assertTrue(b.session.get('update'))
        b.options.fromscratch = True
        b.update()
        b.freeze(b.session.get('release'))
        self.assertTrue(b.session.get('update'))
        self.assertTrue(len(b.bank['production']) == 2)
        b.update()
        self.assertTrue(b.session.get('update'))
        # one new dir, but olders must be deleted
        self.assertTrue(len(b.bank['production']) == 3)

    def test_removeAll(self):
        b = Bank('local')
        b.update()
        b.removeAll()
        self.assertFalse(os.path.exists(b.get_data_dir()))
        bdb = b.banks.find_one({'name': b.name})
        self.assertTrue(bdb is None)

    def test_remove(self):
        '''
    test removal of a production dir
    '''
        b = Bank('local')
        b.update()
        self.assertTrue(os.path.exists(b.session.get_full_release_directory()))
        self.assertTrue(len(b.bank['production']) == 1)
        b.remove(b.session.get('release'))
        self.assertFalse(os.path.exists(
            b.session.get_full_release_directory()))
        b = Bank('local')
        self.assertTrue(len(b.bank['production']) == 0)

    def test_update_stop_after(self):
        b = Bank('local')
        b.options.stop_after = 'download'
        b.update()
        self.assertTrue(b.session.get_status('download'))
        self.assertFalse(b.session.get_status('postprocess'))

    def test_update_stop_before(self):
        b = Bank('local')
        b.options.stop_before = 'postprocess'
        b.update()
        self.assertTrue(b.session.get_status('download'))
        self.assertFalse(b.session.get_status('postprocess'))

    def test_reupdate_from_task(self):
        b = Bank('local')
        b.options.stop_after = 'download'
        b.update()
        self.assertFalse(b.session.get_status('postprocess'))
        b2 = Bank('local')
        b2.options.from_task = 'postprocess'
        b2.options.release = b.session.get('release')
        b2.update()
        self.assertTrue(b2.session.get_status('postprocess'))
        self.assertEqual(b.session.get_full_release_directory(),
                         b2.session.get_full_release_directory())

    def test_reupdate_from_task_error(self):
        b = Bank('local')
        b.options.stop_after = 'check'
        b.update()
        self.assertFalse(b.session.get_status('postprocess'))
        b2 = Bank('local')
        b2.options.from_task = 'postprocess'
        b2.options.release = b.session.get('release')
        res = b2.update()
        self.assertFalse(res)

    def test_reupdate_from_task_wrong_release(self):
        b = Bank('local')
        b.options.stop_after = 'download'
        b.update()
        self.assertFalse(b.session.get_status('postprocess'))
        b2 = Bank('local')
        b2.options.from_task = 'postprocess'
        b2.options.release = 'wrongrelease'
        res = b2.update()
        self.assertFalse(res)

    @attr('process')
    def test_postprocesses_restart_from_proc(self):
        b = Bank('localprocess')
        b.update()
        proc1file = os.path.join(b.session.get_full_release_directory(),
                                 'proc1.txt')
        proc2file = os.path.join(b.session.get_full_release_directory(),
                                 'proc2.txt')
        self.assertTrue(os.path.exists(proc1file))
        self.assertTrue(os.path.exists(proc2file))
        os.remove(proc1file)
        os.remove(proc2file)
        # Restart from postprocess, reexecute all processes
        b2 = Bank('localprocess')
        b2.options.from_task = 'postprocess'
        b2.options.release = b.session.get('release')
        b2.update()
        self.assertTrue(os.path.exists(proc1file))
        self.assertTrue(os.path.exists(proc2file))
        os.remove(proc1file)
        os.remove(proc2file)
        # Restart from postprocess, but at process PROC2 and following
        b3 = Bank('localprocess')
        b3.options.from_task = 'postprocess'
        b3.options.process = 'PROC2'
        b3.options.release = b.session.get('release')
        b3.update()
        #self.assertFalse(os.path.exists(proc1file))
        self.assertTrue(os.path.exists(proc2file))

    def test_computed(self):
        b = Bank('computed')
        res = b.update(True)
        self.assertTrue(res)
        self.assertTrue(
            os.path.exists(b.session.get_full_release_directory() +
                           '/sub1/flat/test_100.txt'))
        self.assertTrue(b.session.get('update'))
        # Check that, with depends non updated, bank is not updated itself
        nextb = Bank('computed')
        res = nextb.update(True)
        self.assertFalse(nextb.session.get('update'))

    @attr('nofile')
    def test_computed_nofile(self):
        b = Bank('computed2')
        b.load_session(UpdateWorkflow.FLOW)
        b.session.config.set('protocol', 'none')
        b.session.config.set('sub1.files.move', 'flat/test_.*')
        res = b.update(True)
        self.assertTrue(res)
        self.assertTrue(
            os.path.exists(b.session.get_full_release_directory() +
                           '/sub1/flat/test_100.txt'))

    def test_computed_ref_release(self):
        b = Bank('computed2')
        res = b.update(True)
        b2 = Bank('sub1')
        b2release = b2.bank['production'][len(b2.bank['production']) -
                                          1]['release']
        brelease = b.bank['production'][len(b.bank['production']) -
                                        1]['release']
        self.assertTrue(res)
        self.assertTrue(brelease == b2release)

    @attr('computed')
    def test_computed_ref_release(self):
        b = Bank('computed2')
        res = b.update(True)
        self.assertTrue(b.session.get('update'))
        b2 = Bank('computed2')
        res = b2.update(True)
        self.assertFalse(b2.session.get('update'))

    def test_computederror(self):
        b = Bank('computederror')
        res = b.update(True)
        self.assertFalse(res)
        self.assertTrue(b.session._session['depends']['sub2'])
        self.assertFalse(b.session._session['depends']['error'])

    @attr('directrelease')
    def test_directhttp_release(self):
        b = Bank('directhttp')
        res = b.update()
        self.assertTrue(b.session.get('update'))
        self.assertTrue(
            os.path.exists(b.session.get_full_release_directory() +
                           '/flat/debian/README.html'))
        #print str(b.session.get('release'))
        #print str(b.session.get('remoterelease'))

    @attr('network')
    def test_multi(self):
        b = Bank('multi')
        res = b.update()
        with open(
                os.path.join(b.session.get_full_release_directory(),
                             'flat/test1.json'), 'r') as content_file:
            content = content_file.read()
            my_json = json.loads(content)
            self.assertTrue(my_json['args']['key1'] == 'value1')
        with open(
                os.path.join(b.session.get_full_release_directory(),
                             'flat/test2.json'), 'r') as content_file:
            content = content_file.read()
            my_json = json.loads(content)
            self.assertTrue(my_json['form']['key1'] == 'value1')

    def test_freeze(self):
        b = Bank('local')
        b.update()
        rel = b.session.get('release')
        b.freeze(rel)
        prod = b.get_production(rel)
        self.assertTrue(prod['freeze'] == True)
        res = b.remove(rel)
        self.assertTrue(res == False)
        b.unfreeze(rel)
        prod = b.get_production(rel)
        self.assertTrue(prod['freeze'] == False)
        res = b.remove(rel)
        self.assertTrue(res == True)

    def test_stats(self):
        b = Bank('local')
        b.update()
        rel = b.session.get('release')
        stats = Bank.get_banks_disk_usage()
        self.assertTrue(stats[0]['size'] > 0)
        for release in stats[0]['releases']:
            if release['name'] == rel:
                self.assertTrue(release['size'] > 0)

    @attr('process')
    def test_processes_meta_data(self):
        b = Bank('localprocess')
        b.update()
        formats = b.session.get('formats')
        self.assertTrue(len(formats['blast']) == 2)
        self.assertTrue(len(formats['test'][0]['files']) == 3)

    @attr('process')
    def test_search(self):
        b = Bank('localprocess')
        b.update()
        search_res = Bank.search(['blast'], [])
        self.assertTrue(len(search_res) == 1)
        search_res = Bank.search([], ['nucleic'])
        self.assertTrue(len(search_res) == 1)
        search_res = Bank.search(['blast'], ['nucleic'])
        self.assertTrue(len(search_res) == 1)
        search_res = Bank.search(['blast'], ['proteic'])
        self.assertTrue(len(search_res) == 0)

    def test_owner(self):
        '''
    test ACL with owner
    '''
        b = Bank('local')
        res = b.update()
        self.assertTrue(res)
        b.set_owner('sample')
        b2 = Bank('local')
        try:
            res = b2.update()
            self.fail('not owner, should not be allowed')
        except Exception as e:
            pass
예제 #22
0
 def setUp(self):
     self.utils = UtilsForTest()
     BiomajConfig.load_config(self.utils.global_properties,
                              allow_user_config=False)
     self.config = BiomajConfig('testhttp')
예제 #23
0
class TestBiomajFunctional(unittest.TestCase):
    def setUp(self):
        self.utils = UtilsForTest()
        curdir = os.path.dirname(os.path.realpath(__file__))
        BiomajConfig.load_config(self.utils.global_properties, allow_user_config=False)

        # Delete all banks
        b = Bank("local")
        b.banks.remove({})

        self.config = BiomajConfig("local")
        data_dir = self.config.get("data.dir")
        lock_file = os.path.join(data_dir, "local.lock")
        if os.path.exists(lock_file):
            os.remove(lock_file)

    def tearDown(self):
        data_dir = self.config.get("data.dir")
        lock_file = os.path.join(data_dir, "local.lock")
        if os.path.exists(lock_file):
            os.remove(lock_file)
        self.utils.clean()

    def test_extract_release_from_file_name(self):
        b = Bank("local")
        b.load_session(UpdateWorkflow.FLOW)
        b.session.config.set("release.file", "test_(\d+)\.txt")
        b.session.config.set("release.regexp", "")
        w = UpdateWorkflow(b)
        w.wf_release()
        self.assertTrue(b.session.get("release") == "100")

    def test_extract_release_from_file_content(self):
        b = Bank("local")
        b.load_session(UpdateWorkflow.FLOW)
        b.session.config.set("release.file", "test_100\.txt")
        b.session.config.set("release.regexp", "Release\s*(\d+)")
        w = UpdateWorkflow(b)
        w.wf_release()
        self.assertTrue(b.session.get("release") == "103")

    def test_publish(self):
        """
    Update a bank, then publish it
    """
        b = Bank("local")
        b.update()
        current_link = os.path.join(b.config.get("data.dir"), b.config.get("dir.version"), "current")
        self.assertFalse(os.path.exists(current_link))
        self.assertTrue(b.bank["current"] is None)
        b.publish()
        self.assertTrue(os.path.exists(current_link))
        self.assertTrue(b.bank["current"] == b.session._session["id"])

    # Should test this on local downloader, changing 1 file to force update,
    # else we would get same bank and there would be no update
    def test_no_update(self):
        """
      Try updating twice, at second time, bank should not be updated
      """
        b = Bank("local")
        b.update()
        self.assertTrue(b.session.get("update"))
        b.update()
        self.assertFalse(b.session.get("update"))
        self.assertFalse(b.session.get_status(Workflow.FLOW_POSTPROCESS))

    def test_fromscratch_update(self):
        """
      Try updating twice, at second time, bank should  be updated (force with fromscratc)
      """
        b = Bank("local")
        b.update()
        self.assertTrue(b.session.get("update"))
        sess = b.session.get("release")
        b.options.fromscratch = True
        b.update()
        self.assertTrue(b.session.get("update"))
        self.assertEqual(b.session.get("release"), sess + "__1")

    def test_fromscratch_update_with_release(self):
        """
      Try updating twice, at second time, bank should  be updated (force with fromscratch)

      Use case with release defined in release file
      """
        b = Bank("local")
        b.load_session(UpdateWorkflow.FLOW)
        b.session.config.set("release.file", "test_(\d+)\.txt")
        b.session.config.set("release.regexp", "")
        w = UpdateWorkflow(b)
        w.wf_release()
        self.assertTrue(b.session.get("release") == "100")
        os.makedirs(b.session.get_full_release_directory())
        w = UpdateWorkflow(b)
        # Reset release
        b.session.set("release", None)
        w.options.fromscratch = True
        w.wf_release()
        self.assertTrue(b.session.get("release") == "100__1")

    def test_mix_stop_from_task(self):
        """
      Get a first release, then fromscratch --stop-after, then restart from-task
      """
        b = Bank("local")
        b.update()
        rel = b.session.get("release")
        b2 = Bank("local")
        b2.options.stop_after = "download"
        b2.options.fromscratch = True
        res = b2.update()
        self.assertTrue(b2.session.get("release") == rel + "__1")
        b3 = Bank("local")
        res = b3.update()
        self.assertTrue(b3.session.get("release") == rel + "__1")
        self.assertTrue(res)

    def test_mix_stop_from_task2(self):
        """
      Get a first release, then fromscratch --stop-after, then restart from-task
      """
        b = Bank("local")
        b.update()
        rel = b.session.get("release")
        b2 = Bank("local")
        b2.options.stop_after = "download"
        b2.options.fromscratch = True
        res = b2.update()
        self.assertTrue(b2.session.get("release") == rel + "__1")
        b3 = Bank("local")
        res = b3.update()
        b2.options.from_task = "download"
        self.assertTrue(b3.session.get("release") == rel + "__1")
        self.assertTrue(res)

    def test_mix_stop_from_task3(self):
        """
      Get a first release, then fromscratch --stop-after, then restart from-task
      """
        b = Bank("local")
        b.update()
        rel = b.session.get("release")
        b2 = Bank("local")
        b2.options.stop_after = "download"
        b2.options.fromscratch = True
        res = b2.update()
        self.assertTrue(b2.session.get("release") == rel + "__1")
        b3 = Bank("local")
        res = b3.update()
        b2.options.from_task = "postprocess"
        self.assertTrue(b3.session.get("release") == rel + "__1")
        self.assertTrue(res)

    def test_mix_stop_from_task4(self):
        """
      Get a first release, then fromscratch --stop-after, then restart from-task
      """
        b = Bank("local")
        b.update()
        rel = b.session.get("release")
        b2 = Bank("local")
        b2.options.stop_before = "download"
        b2.options.fromscratch = True
        res = b2.update()
        b3 = Bank("local")
        b3.options.from_task = "postprocess"
        res = b3.update()
        self.assertFalse(res)

    def test_delete_old_dirs(self):
        """
      Try updating 3 times, oldest dir should be removed
      """
        b = Bank("local")
        b.removeAll(True)
        b = Bank("local")
        b.update()
        self.assertTrue(b.session.get("update"))
        b.options.fromscratch = True
        b.update()
        self.assertTrue(b.session.get("update"))
        self.assertTrue(len(b.bank["production"]) == 2)
        b.update()
        self.assertTrue(b.session.get("update"))
        # one new dir, but olders must be deleted
        self.assertTrue(len(b.bank["production"]) == 2)

    def test_delete_old_dirs_with_freeze(self):
        """
      Try updating 3 times, oldest dir should be removed but not freezed releases
      """
        b = Bank("local")
        b.removeAll(True)
        b = Bank("local")
        b.update()
        b.freeze(b.session.get("release"))
        self.assertTrue(b.session.get("update"))
        b.options.fromscratch = True
        b.update()
        b.freeze(b.session.get("release"))
        self.assertTrue(b.session.get("update"))
        self.assertTrue(len(b.bank["production"]) == 2)
        b.update()
        self.assertTrue(b.session.get("update"))
        # one new dir, but olders must be deleted
        self.assertTrue(len(b.bank["production"]) == 3)

    def test_removeAll(self):
        b = Bank("local")
        b.update()
        b.removeAll()
        self.assertFalse(os.path.exists(b.get_data_dir()))
        bdb = b.banks.find_one({"name": b.name})
        self.assertTrue(bdb is None)

    def test_remove(self):
        """
    test removal of a production dir
    """
        b = Bank("local")
        b.update()
        self.assertTrue(os.path.exists(b.session.get_full_release_directory()))
        self.assertTrue(len(b.bank["production"]) == 1)
        b.remove(b.session.get("release"))
        self.assertFalse(os.path.exists(b.session.get_full_release_directory()))
        b = Bank("local")
        self.assertTrue(len(b.bank["production"]) == 0)

    def test_update_stop_after(self):
        b = Bank("local")
        b.options.stop_after = "download"
        b.update()
        self.assertTrue(b.session.get_status("download"))
        self.assertFalse(b.session.get_status("postprocess"))

    def test_update_stop_before(self):
        b = Bank("local")
        b.options.stop_before = "postprocess"
        b.update()
        self.assertTrue(b.session.get_status("download"))
        self.assertFalse(b.session.get_status("postprocess"))

    def test_reupdate_from_task(self):
        b = Bank("local")
        b.options.stop_after = "download"
        b.update()
        self.assertFalse(b.session.get_status("postprocess"))
        b2 = Bank("local")
        b2.options.from_task = "postprocess"
        b2.options.release = b.session.get("release")
        b2.update()
        self.assertTrue(b2.session.get_status("postprocess"))
        self.assertEqual(b.session.get_full_release_directory(), b2.session.get_full_release_directory())

    def test_reupdate_from_task_error(self):
        b = Bank("local")
        b.options.stop_after = "check"
        b.update()
        self.assertFalse(b.session.get_status("postprocess"))
        b2 = Bank("local")
        b2.options.from_task = "postprocess"
        b2.options.release = b.session.get("release")
        res = b2.update()
        self.assertFalse(res)

    def test_reupdate_from_task_wrong_release(self):
        b = Bank("local")
        b.options.stop_after = "download"
        b.update()
        self.assertFalse(b.session.get_status("postprocess"))
        b2 = Bank("local")
        b2.options.from_task = "postprocess"
        b2.options.release = "wrongrelease"
        res = b2.update()
        self.assertFalse(res)

    @attr("process")
    def test_postprocesses_restart_from_proc(self):
        b = Bank("localprocess")
        b.update()
        proc1file = os.path.join(b.session.get_full_release_directory(), "proc1.txt")
        proc2file = os.path.join(b.session.get_full_release_directory(), "proc2.txt")
        self.assertTrue(os.path.exists(proc1file))
        self.assertTrue(os.path.exists(proc2file))
        os.remove(proc1file)
        os.remove(proc2file)
        # Restart from postprocess, reexecute all processes
        b2 = Bank("localprocess")
        b2.options.from_task = "postprocess"
        b2.options.release = b.session.get("release")
        b2.update()
        self.assertTrue(os.path.exists(proc1file))
        self.assertTrue(os.path.exists(proc2file))
        os.remove(proc1file)
        os.remove(proc2file)
        # Restart from postprocess, but at process PROC2 and following
        b3 = Bank("localprocess")
        b3.options.from_task = "postprocess"
        b3.options.process = "PROC2"
        b3.options.release = b.session.get("release")
        b3.update()
        # self.assertFalse(os.path.exists(proc1file))
        self.assertTrue(os.path.exists(proc2file))

    def test_computed(self):
        b = Bank("computed")
        res = b.update(True)
        self.assertTrue(res)
        self.assertTrue(os.path.exists(b.session.get_full_release_directory() + "/sub1/flat/test_100.txt"))

    def test_computed_ref_release(self):
        b = Bank("computed2")
        res = b.update(True)
        b2 = Bank("sub1")
        b2release = b2.bank["production"][len(b2.bank["production"]) - 1]["release"]
        brelease = b.bank["production"][len(b.bank["production"]) - 1]["release"]
        self.assertTrue(res)
        self.assertTrue(brelease == b2release)

    def test_computederror(self):
        b = Bank("computederror")
        res = b.update(True)
        self.assertFalse(res)
        self.assertTrue(b.session._session["depends"]["sub2"])
        self.assertFalse(b.session._session["depends"]["error"])

    @attr("network")
    def test_multi(self):
        b = Bank("multi")
        res = b.update()
        with open(os.path.join(b.session.get_full_release_directory(), "flat/test1.json"), "r") as content_file:
            content = content_file.read()
            my_json = json.loads(content)
            self.assertTrue(my_json["args"]["key1"] == "value1")
        with open(os.path.join(b.session.get_full_release_directory(), "flat/test2.json"), "r") as content_file:
            content = content_file.read()
            my_json = json.loads(content)
            self.assertTrue(my_json["form"]["key1"] == "value1")

    def test_freeze(self):
        b = Bank("local")
        b.update()
        rel = b.session.get("release")
        b.freeze(rel)
        prod = b.get_production(rel)
        self.assertTrue(prod["freeze"] == True)
        res = b.remove(rel)
        self.assertTrue(res == False)
        b.unfreeze(rel)
        prod = b.get_production(rel)
        self.assertTrue(prod["freeze"] == False)
        res = b.remove(rel)
        self.assertTrue(res == True)

    def test_stats(self):
        b = Bank("local")
        b.update()
        rel = b.session.get("release")
        stats = Bank.get_banks_disk_usage()
        self.assertTrue(stats[0]["size"] > 0)
        for release in stats[0]["releases"]:
            if release["name"] == rel:
                self.assertTrue(release["size"] > 0)

    @attr("process")
    def test_processes_meta_data(self):
        b = Bank("localprocess")
        b.update()
        formats = b.session.get("formats")
        self.assertTrue(len(formats["blast"]) == 2)
        self.assertTrue(len(formats["test"][0]["files"]) == 3)

    @attr("process")
    def test_search(self):
        b = Bank("localprocess")
        b.update()
        search_res = Bank.search(["blast"], [])
        self.assertTrue(len(search_res) == 1)
        search_res = Bank.search([], ["nucleic"])
        self.assertTrue(len(search_res) == 1)
        search_res = Bank.search(["blast"], ["nucleic"])
        self.assertTrue(len(search_res) == 1)
        search_res = Bank.search(["blast"], ["proteic"])
        self.assertTrue(len(search_res) == 0)

    def test_owner(self):
        """
    test ACL with owner
    """
        b = Bank("local")
        res = b.update()
        self.assertTrue(res)
        b.set_owner("sample")
        b2 = Bank("local")
        try:
            res = b2.update()
            self.fail("not owner, should not be allowed")
        except Exception as e:
            pass
예제 #24
0
def load_config(request):
  if BiomajConfig.global_config is None:
    settings = request.registry.settings
    global_properties = settings['global_properties']
    BiomajConfig.load_config(global_properties)
예제 #25
0
def main(global_config, **settings):
    """ This function returns a Pyramid WSGI application.
    """
    #config = Configurator(settings=settings)
    global_properties = settings.get(
               'global_properties', '/etc/biomaj/global.properties')
    if not os.path.exists(global_properties):
      print 'global.properties configuration field is not set'
      sys.exit(1)

    BiomajConfig.load_config(global_properties)

    settings['global_properties'] = global_properties

    config = Configurator(settings=settings)
    config.include('pyramid_chameleon')

    config.add_subscriber(before_render, BeforeRender)

    authentication_policy = AuthTktAuthenticationPolicy('seekrit',
        callback=None, hashalg='sha512')
    authorization_policy = ACLAuthorizationPolicy()

    config.set_authentication_policy(authentication_policy)
    config.set_authorization_policy(authorization_policy)




    config.add_static_view('static', 'static', cache_max_age=3600)
    config.add_static_view('app', 'biomajwatcher:webapp/app')
    config.add_route('home', '/')

    config.add_route('user','/user')
    config.add_route('user_banks','/user/{id}/banks')

    config.add_route('bank', '/bank')
    config.add_route('bankdetails', '/bank/{id}')
    config.add_route('banklocked', '/bank/{id}/locked')
    config.add_route('bankstatus', '/bank/{id}/status')
    config.add_route('bankconfig', '/bank/{id}/config')
    config.add_route('bankreleaseremove', '/bank/{id}/{release}')
    config.add_route('sessionlog', '/bank/{id}/log/{session}')

    config.add_route('schedulebank','/schedule')
    config.add_route('updateschedulebank','/schedule/{name}')

    config.add_route('search', '/search')

    config.add_route('search_format', '/search/format/{format}')
    config.add_route('search_format_type', '/search/format/{format}/type/{type}')
    config.add_route('search_type', '/search/type/{type}')

    config.add_route('stat', '/stat')

    config.add_route('is_auth', '/auth')
    config.add_route('auth', '/auth/{id}')
    config.add_route('logout', '/logout')

    config.add_route('old_api', 'BmajWatcher/GET')

    config.scan()


    # automatically serialize bson ObjectId and datetime to Mongo extended JSON
    json_renderer = JSON()
    def pymongo_adapter(obj, request):
        return json_util.default(obj)
    json_renderer.add_adapter(ObjectId, pymongo_adapter)
    json_renderer.add_adapter(datetime.datetime, pymongo_adapter)

    config.add_renderer('json', json_renderer)


    return config.make_wsgi_app()
예제 #26
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-c',
                        '--config',
                        dest="config",
                        help="Biomaj3 Configuration file")
    parser.add_argument('-o',
                        '--oldconfig',
                        dest="oldconfig",
                        help="Old configuration file")
    parser.add_argument('-u',
                        '--user',
                        dest="user",
                        help="MySQL user to override global properties")
    parser.add_argument('-p',
                        '--password',
                        dest="password",
                        help="MySQL password to override global properties")
    parser.add_argument('-l',
                        '--host',
                        dest="host",
                        help="MySQL host to override global properties")
    parser.add_argument('-d',
                        '--database',
                        dest="database",
                        help="MySQL database to override global properties")
    parser.add_argument('-H',
                        '--keep_history',
                        dest="history",
                        action="store_true",
                        default=False,
                        help="Keep bank history, not only production")

    args = parser.parse_args()

    biomajconfig = {}
    banks = []
    with open(args.oldconfig, 'r') as old:
        for line in old:
            vals = line.split('=')
            if len(vals) > 1:
                biomajconfig[vals[0].strip()] = vals[1].strip()

    BiomajConfig.load_config(args.config, allow_user_config=False)
    db_properties_dir = os.path.dirname(args.oldconfig)
    if db_properties_dir == os.path.dirname(args.config):
        logging.error(
            "Bank properties use the same directory, please use a different conf.dir"
        )
        sys.exit(1)

    data_dir = biomajconfig['data.dir']
    if data_dir.endswith('/'):
        data_dir = data_dir[:-1]

    if not os.path.dirname(data_dir) == os.path.dirname(
            BiomajConfig.global_config.get('GENERAL', 'data.dir')):
        logging.error('Data dirs are different, please use the same data dirs')
        sys.exit(1)

    prop_files = []
    for root, dirnames, filenames in os.walk(db_properties_dir):
        for filename in fnmatch.filter(filenames, '*.properties'):
            if filename != 'global.properties':
                prop_files.append(os.path.join(root, filename))

    if not os.path.exists(BiomajConfig.global_config.get(
            'GENERAL', 'conf.dir')):
        os.makedirs(BiomajConfig.global_config.get('GENERAL', 'conf.dir'))
    for prop_file in prop_files:

        propbankconfig = {}
        with open(prop_file, 'r') as old:
            for line in old:
                vals = line.split('=')
                if len(vals) > 1:
                    propbankconfig[vals[0].strip()] = vals[1].strip()

        newpropfile = os.path.join(
            BiomajConfig.global_config.get('GENERAL', 'conf.dir'),
            os.path.basename(prop_file))
        newprop = open(newpropfile, 'w')
        # logging.warn("manage "+prop_file+" => "+newpropfile)
        newprop.write("[GENERAL]\n")
        with open(prop_file, 'r') as props:
            for line in props:
                if not (line.startswith('*') or line.startswith('/*')):
                    # Replace config variables with new syntax ${xx} => %(xx)s, not other env variables
                    pattern = re.compile("\$\{([a-zA-Z0-9-_.]+)\}")
                    varmatch = pattern.findall(line)
                    if varmatch:
                        for match in varmatch:
                            if match in biomajconfig or match in propbankconfig:
                                line = line.replace('${' + match + '}',
                                                    '%(' + match + ')s')
                newprop.write(
                    line.replace('\\\\', '\\').replace('db.source', 'depends'))
        newprop.close()
        b = Bank(os.path.basename(prop_file).replace('.properties', ''),
                 no_log=True)
        banks.append(b.name)

    # database.url=jdbc\:mysql\://genobdd.genouest.org/biomaj_log
    vals = biomajconfig['database.url'].split('/')
    urllen = len(vals)
    db_name = vals[urllen - 1]
    if args.database:
        db_name = args.database
    db_host = vals[urllen - 2]
    if args.host:
        db_host = args.host
    db_user = biomajconfig['database.login']
    if args.user:
        db_user = args.user
    db_password = biomajconfig['database.password']
    if args.password:
        db_password = args.password

    try:
        cnx = mysql.connector.connect(host=db_host,
                                      database=db_name,
                                      user=db_user,
                                      password=db_password)
        cur = cnx.cursor()
        cur.execute("SELECT name FROM bank")
        for row in cur.fetchall():
            migrate_bank(cur, row[0], history=args.history)
    except mysql.connector.Error as error:
        if error.errno == errorcode.ER_ACCESS_DENIED_ERROR:
            print("Wrong username or password: %s" % error.msg)
        elif error.errno == errorcode.ER_BAD_DB_ERROR:
            print("Database does not exist: %s" % error.msg)
        else:
            print("Unknown error: %s" % error)
    finally:
        cnx.close()
예제 #27
0
class TestElastic(unittest.TestCase):
  '''
  test indexing and search
  '''

  def setUp(self):
    self.utils = UtilsForTest()
    curdir = os.path.dirname(os.path.realpath(__file__))
    BiomajConfig.load_config(self.utils.global_properties, allow_user_config=False)

  # Delete all banks
    b = Bank('local')
    b.banks.remove({})

    self.config = BiomajConfig('local')
    data_dir = self.config.get('data.dir')
    lock_file = os.path.join(data_dir,'local.lock')
    if os.path.exists(lock_file):
      os.remove(lock_file)

  def tearDown(self):
    data_dir = self.config.get('data.dir')
    lock_file = os.path.join(data_dir,'local.lock')
    if os.path.exists(lock_file):
      os.remove(lock_file)
    self.utils.clean()
    BmajIndex.delete_all_bank('test')

  def test_index(self):
    prod = {
			"data_dir" : "/tmp/test/data",
			"formats" : {
				"fasta" : [
					{
						"files" : [
							"fasta/chr1.fa",
							"fasta/chr2.fa"
						],
						"types" : [
							"nucleic"
						],
						"tags" : {
							"organism" : "hg19"
						}
					}
				],
				"blast": [
					{
						"files" : [
							"blast/chr1/chr1db"
						],
						"types" : [
							"nucleic"
						],
						"tags" : {
							"chr" : "chr1",
							"organism" : "hg19"
						}
					}
				]

			},
			"freeze" : False,
			"session" : 1416229253.930908,
			"prod_dir" : "alu-2003-11-26",
			"release" : "2003-11-26",
			"types" : [
				"nucleic"
			]
		}
    BmajIndex.add('test',prod, True)
    query = {
      'query' : {
        'match' : {'bank': 'test'}
        }
      }
    res = BmajIndex.search(query)
    self.assertTrue(len(res)==2)
예제 #28
0
class TestBiomajSetup(unittest.TestCase):
    def setUp(self):
        self.utils = UtilsForTest()
        curdir = os.path.dirname(os.path.realpath(__file__))
        BiomajConfig.load_config(self.utils.global_properties,
                                 allow_user_config=False)

        # Delete all banks
        b = Bank('alu')
        b.banks.remove({})

        self.config = BiomajConfig('alu')
        data_dir = self.config.get('data.dir')
        lock_file = os.path.join(data_dir, 'alu.lock')
        if os.path.exists(lock_file):
            os.remove(lock_file)

    def tearDown(self):
        data_dir = self.config.get('data.dir')
        lock_file = os.path.join(data_dir, 'alu.lock')
        if os.path.exists(lock_file):
            os.remove(lock_file)
        self.utils.clean()

    def test_new_bank(self):
        '''
    Checks bank init
    '''
        b = Bank('alu')

    def test_new_session(self):
        '''
    Checks an empty session is created
    '''
        b = Bank('alu')
        b.load_session(UpdateWorkflow.FLOW)
        for key in b.session._session['status'].keys():
            self.assertFalse(b.session.get_status(key))

    def test_session_reload_notover(self):
        '''
    Checks a session is used if present
    '''
        b = Bank('alu')
        for i in range(1, 5):
            s = Session('alu', self.config, UpdateWorkflow.FLOW)
            s._session['status'][Workflow.FLOW_INIT] = True
            b.session = s
            b.save_session()

        b = Bank('alu')
        b.load_session(UpdateWorkflow.FLOW)
        self.assertTrue(b.session.get_status(Workflow.FLOW_INIT))

    def test_clean_old_sessions(self):
        '''
    Checks a session is used if present
    '''
        b = Bank('local')
        for i in range(1, 5):
            s = Session('alu', self.config, UpdateWorkflow.FLOW)
            s._session['status'][Workflow.FLOW_INIT] = True
            b.session = s
            b.save_session()
        b2 = Bank('local')
        b2.update()
        b2.clean_old_sessions()
        self.assertTrue(len(b2.bank['sessions']) == 1)

    def test_session_reload_over(self):
        '''
    Checks a session if is not over
    '''
        b = Bank('alu')
        for i in range(1, 5):
            s = Session('alu', self.config, UpdateWorkflow.FLOW)
            s._session['status'][Workflow.FLOW_INIT] = True
            s._session['status'][Workflow.FLOW_OVER] = True
            b.session = s
            b.save_session()

        b = Bank('alu')
        b.load_session(UpdateWorkflow.FLOW)
        self.assertFalse(b.session.get_status(Workflow.FLOW_INIT))

    def test_bank_list(self):
        b1 = Bank('alu')
        b2 = Bank('local')
        banks = Bank.list()
        self.assertTrue(len(banks) == 2)

    @attr('network')
    def test_get_release(self):
        '''
    Get release
    '''
        b = Bank('alu')
        b.load_session(UpdateWorkflow.FLOW)
        res = b.update()
        self.assertTrue(b.session.get('update'))
        self.assertTrue(res)
        self.assertTrue(b.session._session['release'] is not None)

    def test_remove_session(self):
        b = Bank('alu')
        for i in range(1, 5):
            s = Session('alu', self.config, UpdateWorkflow.FLOW)
            s._session['status'][Workflow.FLOW_INIT] = True
            b.session = s
            b.save_session()
        self.assertTrue(len(b.bank['sessions']) == 4)
        b.remove_session(b.session.get('id'))
        self.assertTrue(len(b.bank['sessions']) == 3)

    @attr('process')
    def test_postprocesses_setup(self):
        b = Bank('localprocess')
        pfactory = PostProcessFactory(b)
        pfactory.run(True)
        self.assertTrue(len(pfactory.threads_tasks[0]) == 2)
        self.assertTrue(len(pfactory.threads_tasks[1]) == 1)

    @attr('process')
    def test_postprocesses_exec_again(self):
        '''
    Execute once, set a status to false, check that False processes are executed
    '''
        b = Bank('localprocess')
        pfactory = PostProcessFactory(b)
        pfactory.run()
        self.assertTrue(pfactory.blocks['BLOCK1']['META0']['PROC0'])
        self.assertTrue(pfactory.blocks['BLOCK2']['META1']['PROC1'])
        self.assertTrue(pfactory.blocks['BLOCK2']['META1']['PROC2'])
        blocks = copy.deepcopy(pfactory.blocks)
        blocks['BLOCK2']['META1']['PROC2'] = False
        pfactory2 = PostProcessFactory(b, blocks)
        pfactory2.run()
        self.assertTrue(pfactory2.blocks['BLOCK2']['META1']['PROC2'])

    @attr('process')
    def test_preprocesses(self):
        b = Bank('localprocess')
        pfactory = PreProcessFactory(b)
        pfactory.run()
        self.assertTrue(pfactory.meta_status['META0']['PROC0'])

    @attr('process')
    def test_removeprocesses(self):
        b = Bank('localprocess')
        pfactory = RemoveProcessFactory(b)
        pfactory.run()
        self.assertTrue(pfactory.meta_status['META0']['PROC0'])

    def test_dependencies_list(self):
        b = Bank('computed')
        deps = b.get_dependencies()
        self.assertTrue(len(deps) == 2)
예제 #29
0
class Bank(object):
    '''
    BioMAJ bank
    '''
    def __init__(self, name, options=None, no_log=False):
        '''
        Get a bank from db or creates a new one

        :param name: name of the bank, must match its config file
        :type name: str
        :param options: bank options
        :type options: argparse
        :param no_log: create a log file for the bank
        :type no_log: bool
        '''
        logging.debug('Initialize ' + name)
        if BiomajConfig.global_config is None:
            raise Exception('Configuration must be loaded first')

        self.name = name
        self.depends = []
        self.no_log = no_log

        if no_log:
            if options is None:
                # options = {'no_log': True}
                options = Options()
                options.no_log = True
            else:
                options.no_log = no_log

        self.config = BiomajConfig(self.name, options)

        if self.config.get('bank.num.threads') is not None:
            ProcessFactory.NB_THREAD = int(self.config.get('bank.num.threads'))

        if self.config.log_file is not None and self.config.log_file != 'none':
            logging.info("Log file: " + self.config.log_file)

        # self.options = Options(options)
        if options is None:
            self.options = Options()
        else:
            self.options = options

        # if MongoConnector.db is None:
        #     MongoConnector(BiomajConfig.global_config.get('GENERAL', 'db.url'),
        #                    BiomajConfig.global_config.get('GENERAL', 'db.name'))
        #
        # self.banks = MongoConnector.banks
        # self.bank = self.banks.find_one({'name': self.name})

        self.connector = Connector().get_connector()
        #self.banks = self.connector.get_collection('banks')
        self.banks = self.connector
        self.bank = self.connector.get({'name': self.name})

        if self.bank is None:
            self.bank = {
                'name': self.name,
                'current': None,
                'sessions': [],
                'production': [],
                'properties': self.get_properties()
            }
            #self.bank['_id'] = self.banks.insert(self.bank)
            self.bank['_id'] = self.connector.set('banks', self.bank)

        self.session = None
        self.use_last_session = False

    def check(self):
        '''
        Checks bank configuration
        '''
        return self.config.check()

    def is_locked(self):
        '''
        Checks if bank is locked ie action is in progress
        '''
        data_dir = self.config.get('data.dir')
        lock_dir = self.config.get('lock.dir', default=data_dir)
        lock_file = os.path.join(lock_dir, self.name + '.lock')
        if os.path.exists(lock_file):
            return True
        else:
            return False

    def get_bank(self):
        '''
        Get bank stored in db

        :return: bank json object
        '''
        return self.bank

    @staticmethod
    def get_banks_disk_usage():
        '''
        Get disk usage per bank and release
        '''
        if MongoConnector.db is None:
            MongoConnector(
                BiomajConfig.global_config.get('GENERAL', 'db.url'),
                BiomajConfig.global_config.get('GENERAL', 'db.name'))

        bank_list = []
        banks = MongoConnector.banks.find({}, {'name': 1, 'production': 1})
        for b in banks:
            bank_elt = {'name': b['name'], 'size': 0, 'releases': []}
            for p in b['production']:
                if p['size'] is None:
                    p['size'] = 0
                bank_elt['size'] += p['size']
                bank_elt['releases'].append({
                    'name': p['release'],
                    'size': p['size']
                })
            bank_list.append(bank_elt)
        return bank_list

    def get_bank_release_info(self, full=False):
        '''
        Get release info for the bank. Used with --status option from biomaj-cly.py
        :param full: Display full for the bank
        :type full: Boolean
        :return: Dict with keys
                      if full=True
                           - info, prod, pend
                      else
                           - info
        '''

        _bank = self.bank
        info = {}

        if full:
            bank_info = []
            prod_info = []
            pend_info = []
            release = None
            if 'current' in _bank and _bank['current']:
                for prod in _bank['production']:
                    if _bank['current'] == prod['session']:
                        release = prod['release']
            # Bank info header
            bank_info.append(
                ["Name", "Type(s)", "Last update status", "Published release"])
            bank_info.append([
                _bank['name'],
                str(','.join(_bank['properties']['type'])),
                str(
                    datetime.fromtimestamp(
                        _bank['last_update_session']).strftime(
                            "%Y-%m-%d %H:%M:%S")),
                str(release)
            ])
            # Bank production info header
            prod_info.append([
                "Session", "Remote release", "Release", "Directory", "Freeze"
            ])
            for prod in _bank['production']:
                data_dir = self.config.get('data.dir')
                dir_version = self.config.get('dir.version')
                if 'data.dir' in prod:
                    data_dir = prod['data.dir']
                if 'dir.version' in prod:
                    dir_version = prod['dir.version']
                release_dir = os.path.join(data_dir, dir_version,
                                           prod['prod_dir'])
                date = datetime.fromtimestamp(
                    prod['session']).strftime('%Y-%m-%d %H:%M:%S')
                prod_info.append([
                    date, prod['remoterelease'], prod['release'], release_dir,
                    'yes' if 'freeze' in prod and prod['freeze'] else 'no'
                ])
            # Bank pending info header
            if 'pending' in _bank and len(_bank['pending'].keys()) > 0:
                pend_info.append(["Pending release", "Last run"])
                for pending in _bank['pending'].keys():
                    run = datetime.fromtimestamp(
                        _bank['pending'][pending]).strftime(
                            '%Y-%m-%d %H:%M:%S')
                    pend_info.append([pending, run])

            info['info'] = bank_info
            info['prod'] = prod_info
            info['pend'] = pend_info
            return info

        else:
            release = 'N/A'
            if 'current' in _bank and _bank['current']:
                for prod in _bank['production']:
                    if _bank['current'] == prod['session']:
                        release = prod['remoterelease']
            info['info'] = [
                _bank['name'], ','.join(_bank['properties']['type']),
                str(release), _bank['properties']['visibility']
            ]
            return info

    def update_dependencies(self):
        '''
        Update bank dependencies

        :return: status of updates
        '''
        self.depends = []
        if self.run_depends:
            depends = self.get_dependencies()
        else:
            depends = []

        self.session.set('depends', {})
        res = True
        for dep in depends:
            self.session._session['depends'][dep] = False
        for dep in depends:
            if self.session._session['depends'][dep]:
                logging.debug('Update:Depends:' + dep + ':SKIP')
                # Bank has been marked as depends multiple times, run only once
                continue
            logging.info('Update:Depends:' + dep)
            b = Bank(dep)
            res = b.update()
            self.depends.append(b)
            self.session._session['depends'][dep] = res
            logging.info('Update:Depends:' + dep + ':' + str(res))
            if not res:
                break
        return res

    def get_bank(self, bank, no_log=False):
        '''
        Gets an other bank
        '''
        return Bank(bank, no_log=no_log)

    def get_dependencies(self, bank=None):
        '''
        Search all bank dependencies

        :return: list of bank names to update
        '''
        if bank is None:
            deps = self.config.get('depends')
        else:
            deps = bank.config.get('depends')
        if deps is None:
            return []
        # Mainn deps
        deps = deps.split(',')
        # Now search in deps if they themselves depend on other banks
        for dep in deps:
            b = Bank(dep, no_log=True)
            deps = b.get_dependencies() + deps
        return deps

    def is_owner(self):
        '''
        Checks if current user is owner or admin
        '''
        admin_config = self.config.get('admin')
        admin = []
        if admin_config is not None:
            admin = [x.strip() for x in admin_config.split(',')]
        if admin and os.environ['LOGNAME'] in admin:
            return True
        if os.environ['LOGNAME'] == self.bank['properties']['owner']:
            return True
        return False

    def set_owner(self, owner):
        '''
        Update bank owner, only if current owner
        '''
        if not self.is_owner():
            logging.error('Not authorized, bank owned by ' +
                          self.bank['properties']['owner'])
            raise Exception('Not authorized, bank owned by ' +
                            self.bank['properties']['owner'])

        #self.banks.update({'name': self.name}, {'$set': {'properties.owner': owner}})
        self.banks.update({'name': self.name},
                          {'$set': {
                              'properties.owner': owner
                          }})

    def set_visibility(self, visibility):
        '''
        Update bank visibility, only if current owner
        '''
        if not self.is_owner():
            logging.error('Not authorized, bank owned by ' +
                          self.bank['properties']['owner'])
            raise Exception('Not authorized, bank owned by ' +
                            self.bank['properties']['owner'])

        self.banks.update({'name': self.name},
                          {'$set': {
                              'properties': {
                                  'visibility': visibility
                              }
                          }})

    def get_properties(self):
        '''
        Read bank properties from config file

        :return: properties dict
        '''

        owner = os.environ['LOGNAME']
        # If owner not set, use current user, else keep current
        if self.bank and 'properties' in self.bank and 'owner' in self.bank[
                'properties']:
            owner = self.bank['properties']['owner']

        props = {
            'visibility': self.config.get('visibility.default'),
            'type': self.config.get('db.type').split(','),
            'tags': [],
            'owner': owner
        }

        return props

    @staticmethod
    def searchindex(query):
        return BmajIndex.searchq(query)

    @staticmethod
    def search(formats=None, types=None, with_sessions=True):
        '''
        Search all bank releases matching some formats and types

        Matches production release with at least one of formats and one of types
        '''
        if formats is None:
            formats = []

        if types is None:
            types = []

        if MongoConnector.db is None:
            MongoConnector(
                BiomajConfig.global_config.get('GENERAL', 'db.url'),
                BiomajConfig.global_config.get('GENERAL', 'db.name'))
        searchfilter = {}
        if formats:
            searchfilter['production.formats'] = {'$in': formats}
        if with_sessions:
            res = MongoConnector.banks.find(searchfilter)
        else:
            res = MongoConnector.banks.find(searchfilter, {'sessions': 0})
        # Now search in which production release formats and types apply
        search_list = []
        for r in res:
            prod_to_delete = []
            for p in r['production']:
                is_format = False
                if not formats:
                    is_format = True
                # Are formats present in this production release?
                for f in formats:
                    if f in p['formats']:
                        is_format = True
                        break
                # Are types present in this production release?
                is_type = False
                if not types:
                    is_type = True
                if is_format:
                    for t in types:
                        if t in p['types'] or t in r['properties']['type']:
                            is_type = True
                            break
                if not is_type or not is_format:
                    prod_to_delete.append(p)
            for prod_del in prod_to_delete:
                r['production'].remove(prod_del)
            if len(r['production']) > 0:
                search_list.append(r)
        return search_list

    @staticmethod
    def list(with_sessions=False):
        '''
        Return a list of banks

        :param with_sessions: should sessions be returned or not (can be quite big)
        :type with_sessions: bool
        :return: list of :class:`biomaj.bank.Bank`
        '''
        if MongoConnector.db is None:
            MongoConnector(
                BiomajConfig.global_config.get('GENERAL', 'db.url'),
                BiomajConfig.global_config.get('GENERAL', 'db.name'))

        bank_list = []
        if with_sessions:
            res = MongoConnector.banks.find({})
        else:
            res = MongoConnector.banks.find({}, {'sessions': 0})
        for r in res:
            bank_list.append(r)
        return bank_list

    def controls(self):
        '''
        Initial controls (create directories etc...)
        '''
        data_dir = self.config.get('data.dir')
        bank_dir = self.config.get('dir.version')
        bank_dir = os.path.join(data_dir, bank_dir)
        if not os.path.exists(bank_dir):
            os.makedirs(bank_dir)

        offline_dir = self.config.get('offline.dir.name')
        offline_dir = os.path.join(data_dir, offline_dir)
        if not os.path.exists(offline_dir):
            os.makedirs(offline_dir)

        log_dir = self.config.get('log.dir')
        log_dir = os.path.join(log_dir, self.name)
        if not os.path.exists(log_dir):
            os.makedirs(log_dir)

    def _delete(self):
        '''
        Delete bank from database, not files
        '''
        self.banks.remove({'_id': self.bank['_id']})

    def save_session(self):
        '''
        Save session in database
        '''
        self.session._session['last_update_time'] = time.time()
        self.session._session['log_file'] = self.config.log_file
        if self.use_last_session:
            # Remove last session
            self.banks.update(
                {'name': self.name},
                {'$pull': {
                    'sessions': {
                        'id': self.session._session['id']
                    }
                }})
        # Insert session
        if self.session.get('action') == 'update':
            action = 'last_update_session'
        if self.session.get('action') == 'remove':
            action = 'last_remove_session'

        cache_dir = self.config.get('cache.dir')
        download_files = self.session.get('download_files')
        if download_files is not None:
            f_downloaded_files = open(
                os.path.join(cache_dir,
                             'files_' + str(self.session.get('id'))), 'w')
            f_downloaded_files.write(json.dumps(download_files))
            f_downloaded_files.close()
            self.session.set('download_files', [])

        local_files = self.session.get('files')
        if local_files is not None:
            f_local_files = open(
                os.path.join(cache_dir,
                             'local_files_' + str(self.session.get('id'))),
                'w')
            f_local_files.write(json.dumps(download_files))
            f_local_files.close()
            self.session.set('files', [])

        self.banks.update({'name': self.name}, {
            '$set': {
                action: self.session._session['id'],
                'properties': self.get_properties()
            },
            '$push': {
                'sessions': self.session._session
            }
        })
        BmajIndex.add(self.name, self.session._session)
        if self.session.get(
                'action') == 'update' and not self.session.get_status(
                    Workflow.FLOW_OVER) and self.session.get('release'):
            self.banks.update({'name': self.name}, {
                '$set': {
                    'pending.' + self.session.get('release'):
                    self.session._session['id']
                }
            })
        if self.session.get('action') == 'update' and self.session.get_status(
                Workflow.FLOW_OVER) and self.session.get('update'):
            # We expect that a production release has reached the FLOW_OVER status.
            # If no update is needed (same release etc...), the *update* session of the session is set to False
            logging.debug('Bank:Save:' + self.name)
            if len(self.bank['production']) > 0:
                # Remove from database
                self.banks.update({'name': self.name}, {
                    '$pull': {
                        'production': {
                            'release': self.session._session['release']
                        }
                    }
                })
                # Update local object
                # index = 0
                # for prod in self.bank['production']:
                #  if prod['release'] == self.session._session['release']:
                #    break;
                #  index += 1
                # if index < len(self.bank['production']):
                #  self.bank['production'].pop(index)
            release_types = []
            if self.config.get('db.type'):
                release_types = self.config.get('db.type').split(',')
            release_formats = list(self.session._session['formats'].keys())
            if self.config.get('db.formats'):
                config_formats = self.config.get('db.formats').split(',')
                for config_format in config_formats:
                    if config_format not in release_formats:
                        release_formats.append(config_format)

            for release_format in self.session._session['formats']:
                for release_files in self.session._session['formats'][
                        release_format]:
                    if release_files['types']:
                        for rtype in release_files['types']:
                            if rtype not in release_types:
                                release_types.append(rtype)
            prod_dir = self.session.get_release_directory()
            if self.session.get('prod_dir'):
                prod_dir = self.session.get('prod_dir')
            production = {
                'release': self.session.get('release'),
                'remoterelease': self.session.get('remoterelease'),
                'session': self.session._session['id'],
                'formats': release_formats,
                'types': release_types,
                'size': self.session.get('fullsize'),
                'data_dir': self.session._session['data_dir'],
                'dir_version': self.session._session['dir_version'],
                'prod_dir': prod_dir,
                'freeze': False
            }
            self.bank['production'].append(production)

            self.banks.update({'name': self.name}, {
                '$push': {
                    'production': production
                },
                '$unset': {
                    'pending.' + self.session.get('release'): ''
                }
            })

            # self.banks.update({'name': self.name},
            #                  {'$unset': 'pending.'+self.session.get('release')
            #                  })

        self.bank = self.banks.find_one({'name': self.name})

    def clean_old_sessions(self):
        '''
        Delete old sessions, not latest ones nor related to production sessions
        '''
        if self.session is None:
            return
        # No previous session
        if 'sessions' not in self.bank:
            return
        if self.config.get_bool('keep.old.sessions'):
            logging.debug('keep old sessions, skipping...')
            return
        # 'last_update_session' in self.bank and self.bank['last_update_session']
        old_sessions = []
        prod_releases = []
        for session in self.bank['sessions']:
            if session['id'] == self.session.get('id'):
                # Current session
                prod_releases.append(session['release'])
                continue
            if session['id'] == self.session.get('last_update_session'):
                prod_releases.append(session['release'])
                continue
            if session['id'] == self.session.get('last_remove_session'):
                continue
            is_prod_session = False
            for prod in self.bank['production']:
                if session['id'] == prod['session']:
                    is_prod_session = True
                    break
            if is_prod_session:
                prod_releases.append(session['release'])
                continue
            old_sessions.append(session)
        if len(old_sessions) > 0:
            for session in old_sessions:
                session_id = session['id']
                self.banks.update({'name': self.name},
                                  {'$pull': {
                                      'sessions': {
                                          'id': session_id
                                      }
                                  }})
                # Check if in pending sessions
                for rel in list(self.bank['pending'].keys()):
                    rel_session = self.bank['pending'][rel]
                    if rel_session == session_id:
                        self.banks.update({'name': self.name}, {
                            '$unset': {
                                'pending': {
                                    str(session['release']): ""
                                }
                            }
                        })
                if session['release'] not in prod_releases and session[
                        'release'] != self.session.get('release'):
                    # There might be unfinished releases linked to session, delete them
                    # if they are not related to a production directory or latest run
                    session_dir = os.path.join(
                        self.config.get('data.dir'),
                        self.config.get('dir.version'), self.name +
                        self.config.get('release.separator', default='_') +
                        str(session['release']))
                    if os.path.exists(session_dir):
                        logging.info(
                            'Bank:DeleteOldSessionDir:' + self.name +
                            self.config.get('release.separator', default='_') +
                            str(session['release']))
                        shutil.rmtree(session_dir)
            self.bank = self.banks.find_one({'name': self.name})

    def publish(self):
        '''
        Set session release to *current*
        '''
        if not self.is_owner():
            logging.error('Not authorized, bank owned by ' +
                          self.bank['properties']['owner'])
            raise Exception('Not authorized, bank owned by ' +
                            self.bank['properties']['owner'])

        current_link = os.path.join(self.config.get('data.dir'),
                                    self.config.get('dir.version'), 'current')
        prod_dir = self.session.get_full_release_directory()

        to_dir = os.path.join(self.config.get('data.dir'),
                              self.config.get('dir.version'))

        if os.path.lexists(current_link):
            os.remove(current_link)
        os.chdir(to_dir)
        os.symlink(self.session.get_release_directory(), 'current')
        self.bank['current'] = self.session._session['id']
        self.banks.update({'name': self.name},
                          {'$set': {
                              'current': self.session._session['id']
                          }})

    def unpublish(self):
        '''
        Unset *current*
        '''
        if not self.is_owner():
            logging.error('Not authorized, bank owned by ' +
                          self.bank['properties']['owner'])
            raise Exception('Not authorized, bank owned by ' +
                            self.bank['properties']['owner'])

        current_link = os.path.join(self.config.get('data.dir'),
                                    self.config.get('dir.version'), 'current')

        if os.path.lexists(current_link):
            os.remove(current_link)
        self.banks.update({'name': self.name}, {'$set': {'current': None}})

    def get_production(self, release):
        '''
        Get production field for release

        :param release: release name or production dir name
        :type release: str
        :return: production field
        '''
        release = str(release)
        production = None
        for prod in self.bank['production']:
            if prod['release'] == release or prod['prod_dir'] == release:
                production = prod
        return production

    def freeze(self, release):
        '''
        Freeze a production release

        When freezed, a production release cannot be removed (manually or automatically)

        :param release: release name or production dir name
        :type release: str
        :return: bool
        '''
        release = str(release)
        if not self.is_owner():
            logging.error('Not authorized, bank owned by ' +
                          self.bank['properties']['owner'])
            raise Exception('Not authorized, bank owned by ' +
                            self.bank['properties']['owner'])

        rel = None
        for prod in self.bank['production']:
            if prod['release'] == release or prod['prod_dir'] == release:
                # Search session related to this production release
                rel = prod['release']
        if rel is None:
            logging.error('Release not found: ' + release)
        self.banks.update({
            'name': self.name,
            'production.release': rel
        }, {'$set': {
            'production.$.freeze': True
        }})
        self.bank = self.banks.find_one({'name': self.name})
        return True

    def unfreeze(self, release):
        '''
        Unfreeze a production release to allow removal

        :param release: release name or production dir name
        :type release: str
        :return: bool
        '''
        release = str(release)
        if not self.is_owner():
            logging.error('Not authorized, bank owned by ' +
                          self.bank['properties']['owner'])
            raise Exception('Not authorized, bank owned by ' +
                            self.bank['properties']['owner'])

        rel = None
        for prod in self.bank['production']:
            if prod['release'] == release or prod['prod_dir'] == release:
                # Search session related to this production release
                rel = prod['release']
        if rel is None:
            logging.error('Release not found: ' + release)
        self.banks.update({
            'name': self.name,
            'production.release': rel
        }, {'$set': {
            'production.$.freeze': False
        }})
        self.bank = self.banks.find_one({'name': self.name})
        return True

    def get_new_session(self, flow=None):
        '''
        Returns an empty session

        :param flow: kind of workflow
        :type flow: :func:`biomaj.workflow.Workflow.FLOW`
        '''
        if flow is None:
            flow = Workflow.FLOW
        return Session(self.name, self.config, flow)

    def get_session_from_release(self, release):
        '''
        Loads the session matching a specific release

        :param release: release name oe production dir
        :type release: str
        :return: :class:`biomaj.session.Session`
        '''
        release = str(release)
        oldsession = None
        # Search production release matching release
        for prod in self.bank['production']:
            if prod['release'] == release or prod['prod_dir'] == release:
                # Search session related to this production release
                for s in self.bank['sessions']:
                    if s['id'] == prod['session']:
                        oldsession = s
                        break
                break
        if oldsession is None:
            # No prod session, try to find a session for this release, session may have failed or be stopped
            for s in self.bank['sessions']:
                if s['release'] and release.endswith(s['release']):
                    oldsession = s
        if oldsession is None:
            logging.error(
                'No production session could be found for this release')
        return oldsession

    def load_session(self, flow=None, session=None):
        '''
        Loads last session or, if over or forced, a new session

        Creates a new session or load last session if not over

        :param flow: kind of workflow
        :type flow: :func:`biomaj.workflow.Workflow.FLOW`
        '''
        if flow is None:
            flow = Workflow.FLOW

        if session is not None:
            logging.debug('Load specified session ' + str(session['id']))
            self.session = Session(self.name, self.config, flow)
            self.session.load(session)
            self.use_last_session = True
            return
        if len(self.bank['sessions']) == 0 or self.options.get_option(
                Options.FROMSCRATCH):
            self.session = Session(self.name, self.config, flow)
            logging.debug('Start new session')
        else:
            # Take last session
            self.session = Session(self.name, self.config, flow)
            session_id = None
            # Load previous session for updates only
            if self.session.get(
                    'action'
            ) == 'update' and 'last_update_session' in self.bank and self.bank[
                    'last_update_session']:
                session_id = self.bank['last_update_session']
                load_session = None
                for session in self.bank['sessions']:
                    if session['id'] == session_id:
                        load_session = session
                        break
                if load_session is not None:
                    # self.session.load(self.bank['sessions'][len(self.bank['sessions'])-1])
                    self.session.load(session)
                    # if self.config.last_modified > self.session.get('last_modified'):
                    #  # Config has changed, need to restart
                    #  self.session = Session(self.name, self.config, flow)
                    #  logging.info('Configuration file has been modified since last session, restart in any case a new session')
                    if self.session.get_status(
                            Workflow.FLOW_OVER) and self.options.get_option(
                                Options.FROM_TASK) is None:
                        previous_release = self.session.get('remoterelease')
                        self.session = Session(self.name, self.config, flow)
                        self.session.set('previous_release', previous_release)
                        logging.debug('Start new session')
                    else:
                        logging.debug('Load previous session ' +
                                      str(self.session.get('id')))
                        self.use_last_session = True

    def remove_session(self, sid):
        '''
        Delete a session from db

        :param sid: id of the session
        :type sid: long
        :return: bool
        '''
        session_release = None
        _tmpbank = self.banks.find_one({'name': self.name})
        for s in _tmpbank['sessions']:
            if s['id'] == sid:
                session_release = s['release']

        cache_dir = self.config.get('cache.dir')
        download_files = os.path.join(cache_dir, 'files_' + str(sid))
        if os.path.exists(download_files):
            os.remove(download_files)

        local_files = os.path.join(cache_dir, 'local_files_' + str(sid))
        if os.path.exists(local_files):
            os.remove(local_files)

        if self.config.get_bool('keep.old.sessions'):
            logging.debug('keep old sessions')
            if session_release is not None:
                self.banks.update({'name': self.name}, {
                    '$pull': {
                        'production': {
                            'session': sid
                        }
                    },
                    '$unset': {
                        'pending.' + session_release: ''
                    }
                })
            else:
                self.banks.update({'name': self.name},
                                  {'$pull': {
                                      'production': {
                                          'session': sid
                                      }
                                  }})
            self.banks.update({
                'name': self.name,
                'sessions.id': sid
            }, {'$set': {
                'sessions.$.deleted': time.time()
            }})
        else:
            if session_release is not None:
                self.banks.update({'name': self.name}, {
                    '$pull': {
                        'sessions': {
                            'id': sid
                        },
                        'production': {
                            'session': sid
                        }
                    },
                    '$unset': {
                        'pending.' + session_release: ''
                    }
                })
            else:
                self.banks.update({'name': self.name}, {
                    '$pull': {
                        'sessions': {
                            'id': sid
                        },
                        'production': {
                            'session': sid
                        }
                    }
                })
        # Update object
        self.bank = self.banks.find_one({'name': self.name})
        if session_release is not None:
            BmajIndex.remove(self.name, session_release)
        return True

    def get_data_dir(self):
        '''
        Returns bank data directory

        :return: str
        '''
        return os.path.join(self.config.get('data.dir'),
                            self.config.get('dir.version'))

    def removeAll(self, force=False):
        '''
        Remove all bank releases and database records

        :param force: force removal even if some production dirs are freezed
        :type force: bool
        :return: bool
        '''
        if not force:
            has_freeze = False
            for prod in self.bank['production']:
                if 'freeze' in prod and prod['freeze']:
                    has_freeze = True
                    break
            if has_freeze:
                logging.error(
                    'Cannot remove bank, some production directories are freezed, use force if needed'
                )
                return False

        self.banks.remove({'name': self.name})
        BmajIndex.delete_all_bank(self.name)
        bank_data_dir = self.get_data_dir()
        logging.warn('DELETE ' + bank_data_dir)
        if os.path.exists(bank_data_dir):
            shutil.rmtree(bank_data_dir)
        bank_offline_dir = os.path.join(self.config.get('data.dir'),
                                        self.config.get('offline.dir.name'))
        if os.path.exists(bank_offline_dir):
            shutil.rmtree(bank_offline_dir)
        bank_log_dir = os.path.join(self.config.get('log.dir'), self.name)
        if os.path.exists(bank_log_dir) and self.no_log:
            shutil.rmtree(bank_log_dir)
        return True

    def get_status(self):
        '''
        Get status of current workflow

        :return: dict of current workflow status
        '''
        if self.bank['status'] is None:
            return {}
        return self.bank['status']

    def remove_pending(self, release):
        '''
        Remove pending releases

        :param release: release or release directory
        :type release: str
        :return: bool
        '''
        release = str(release)
        logging.warning('Bank:' + self.name + ':RemovePending')

        if not self.is_owner():
            logging.error('Not authorized, bank owned by ' +
                          self.bank['properties']['owner'])
            raise Exception('Not authorized, bank owned by ' +
                            self.bank['properties']['owner'])

        if not self.bank['pending']:
            return True
        pendings = self.bank['pending']
        for release in list(pendings.keys()):
            pending_session_id = pendings[release]
            pending_session = None
            for s in self.bank['sessions']:
                if s['id'] == pending_session_id:
                    pending_session = s
                    break
            session = Session(self.name, self.config, RemoveWorkflow.FLOW)
            if pending_session is None:
                session._session['release'] = release
            else:
                session.load(pending_session)
            if os.path.exists(session.get_full_release_directory()):
                logging.debug("Remove:Pending:Dir:" +
                              session.get_full_release_directory())
                shutil.rmtree(session.get_full_release_directory())
            self.remove_session(pendings[release])
        self.banks.update({'name': self.name}, {'$set': {'pending': {}}})
        return True

    def remove(self, release):
        '''
        Remove a release (db and files)

        :param release: release or release directory
        :type release: str
        :return: bool
        '''
        release = str(release)
        logging.warning('Bank:' + self.name + ':Remove')

        if not self.is_owner():
            logging.error('Not authorized, bank owned by ' +
                          self.bank['properties']['owner'])
            raise Exception('Not authorized, bank owned by ' +
                            self.bank['properties']['owner'])

        self.session = self.get_new_session(RemoveWorkflow.FLOW)
        oldsession = None
        # Search production release matching release
        for prod in self.bank['production']:
            if prod['release'] == release or prod['prod_dir'] == release:
                if 'freeze' in prod and prod['freeze']:
                    logging.error(
                        'Cannot remove release, release is freezed, unfreeze it first'
                    )
                    return False
                # Search session related to this production release
                for s in self.bank['sessions']:
                    if s['id'] == prod['session']:
                        oldsession = s
                        break
                break
        if oldsession is None:
            logging.error(
                'No production session could be found for this release')
            return False
        if 'current' in self.bank and self.bank['current'] == oldsession['id']:
            logging.error(
                'This release is the release in the main release production, you should first unpublish it'
            )
            return False

        # New empty session for removal
        session = Session(self.name, self.config, RemoveWorkflow.FLOW)
        session.set('action', 'remove')
        session.set('release', oldsession['release'])
        session.set('update_session_id', oldsession['id'])
        self.session = session
        # Reset status, we take an update session
        res = self.start_remove(session)
        self.session.set('workflow_status', res)

        self.save_session()

        return res

    def update(self, depends=False):
        '''
        Launch a bank update

        :param depends: run update of bank dependencies first
        :type depends: bool
        :return: bool
        '''
        logging.warning('Bank:' + self.name + ':Update')

        if not self.is_owner():
            logging.error('Not authorized, bank owned by ' +
                          self.bank['properties']['owner'])
            raise Exception('Not authorized, bank owned by ' +
                            self.bank['properties']['owner'])

        self.run_depends = depends

        self.controls()
        if self.options.get_option('release'):
            logging.info('Bank:' + self.name + ':Release:' +
                         self.options.get_option('release'))
            s = self.get_session_from_release(
                self.options.get_option('release'))
            # No session in prod
            if s is None:
                logging.error('Release does not exists: ' +
                              self.options.get_option('release'))
                return False
            self.load_session(UpdateWorkflow.FLOW, s)
        else:
            logging.info('Bank:' + self.name + ':Release:latest')
            self.load_session(UpdateWorkflow.FLOW)
        # if from task, reset workflow status in session.
        if self.options.get_option('from_task'):
            set_to_false = False
            for task in self.session.flow:
                # If task was in False status (KO) and we ask to start after this task, exit
                if not set_to_false and not self.session.get_status(
                        task['name']
                ) and task['name'] != self.options.get_option('from_task'):
                    logging.error(
                        'Previous task ' + task['name'] +
                        ' was not successful, cannot restart after this task')
                    return False
                if task['name'] == self.options.get_option('from_task'):
                    set_to_false = True
                if set_to_false:
                    # After from_task task, tasks must be set to False to be run
                    self.session.set_status(task['name'], False)
                    proc = None
                    if task['name'] in [
                            Workflow.FLOW_POSTPROCESS,
                            Workflow.FLOW_PREPROCESS,
                            Workflow.FLOW_REMOVEPROCESS
                    ]:
                        proc = self.options.get_option('process')
                        self.session.reset_proc(task['name'], proc)
                        # if task['name'] == Workflow.FLOW_POSTPROCESS:
                        #  self.session.reset_proc(Workflow.FLOW_POSTPROCESS, proc)
                        # elif task['name'] == Workflow.FLOW_PREPROCESS:
                        #  self.session.reset_proc(Workflow.FLOW_PREPROCESS, proc)
                        # elif task['name'] == Workflow.FLOW_REMOVEPROCESS:
                        #  self.session.reset_proc(Workflow.FLOW_REMOVEPROCESS, proc)
        self.session.set('action', 'update')
        res = self.start_update()
        self.session.set('workflow_status', res)
        self.save_session()
        return res

    def start_remove(self, session):
        '''
        Start a removal workflow

        :param session: Session to remove
        :type session: :class:`biomaj.session.Session`
        :return: bool
        '''
        workflow = RemoveWorkflow(self, session)
        return workflow.start()

    def start_update(self):
        '''
        Start an update workflow
        '''
        workflow = UpdateWorkflow(self)
        return workflow.start()
예제 #30
0
database into a PostgreSQL data using Jsonb data typexs
"""
from __future__ import print_function
from pymongo import MongoClient
import psycopg2
from psycopg2 import OperationalError, DatabaseError, IntegrityError
import json
from biomaj.config import BiomajConfig
import os
import sys

__author__ = 'tuco'

if __name__ == '__main__':

    BiomajConfig.load_config()
    mongo_url = BiomajConfig.global_config.get('GENERAL', 'db.url')
    mongo_db = BiomajConfig.global_config.get('GENERAL', 'db.name')
    mc = MongoClient(mongo_url)
    m_bank = mc[mongo_db].banks
    banks = []
    dbname = 'biomaj'

    insert_query = "INSERT INTO bank(data) VALUES "
    for bank in m_bank.find({}, {'_id': 0}):
        insert_query += "('%s')," % json.dumps(bank)
    insert_query = insert_query.strip(',')

    # In case we empty the databble
    if len(sys.argv) > 1:
        dbname = sys.argv[1]
예제 #31
0
 def setUp(self):
   self.utils = UtilsForTest()
   self.curdir = os.path.dirname(os.path.realpath(__file__))
   BiomajConfig.load_config(self.utils.global_properties, allow_user_config=False)
예제 #32
0
    def __init__(self, name, options=None, no_log=False):
        '''
        Get a bank from db or creates a new one

        :param name: name of the bank, must match its config file
        :type name: str
        :param options: bank options
        :type options: argparse
        :param no_log: create a log file for the bank
        :type no_log: bool
        '''
        logging.debug('Initialize ' + name)
        if BiomajConfig.global_config is None:
            raise Exception('Configuration must be loaded first')

        self.name = name
        self.depends = []
        self.no_log = no_log

        if no_log:
            if options is None:
                # options = {'no_log': True}
                options = Options()
                options.no_log = True
            else:
                options.no_log = no_log

        self.config = BiomajConfig(self.name, options)

        if self.config.get('bank.num.threads') is not None:
            ProcessFactory.NB_THREAD = int(self.config.get('bank.num.threads'))

        if self.config.log_file is not None and self.config.log_file != 'none':
            logging.info("Log file: " + self.config.log_file)

        # self.options = Options(options)
        if options is None:
            self.options = Options()
        else:
            self.options = options

        # if MongoConnector.db is None:
        #     MongoConnector(BiomajConfig.global_config.get('GENERAL', 'db.url'),
        #                    BiomajConfig.global_config.get('GENERAL', 'db.name'))
        #
        # self.banks = MongoConnector.banks
        # self.bank = self.banks.find_one({'name': self.name})

        self.connector = Connector().get_connector()
        #self.banks = self.connector.get_collection('banks')
        self.banks = self.connector
        self.bank = self.connector.get({'name': self.name})

        if self.bank is None:
            self.bank = {
                'name': self.name,
                'current': None,
                'sessions': [],
                'production': [],
                'properties': self.get_properties()
            }
            #self.bank['_id'] = self.banks.insert(self.bank)
            self.bank['_id'] = self.connector.set('banks', self.bank)

        self.session = None
        self.use_last_session = False
예제 #33
0
import bcrypt

from biomaj.config import BiomajConfig

parser = argparse.ArgumentParser(description='Initialize database content.')
parser.add_argument('--config')
parser.add_argument('--user')
parser.add_argument('--pwd')
parser.add_argument('--email')
args = parser.parse_args()

if not args.config:
    print "config argument is missing"
    sys.exit(2)

BiomajConfig.load_config(args.config)

from biomaj.user import BmajUser
from hashlib import sha1

if not args.user:
    print 'user parameter is missing'
    sys.exit(1)

rootuser = BmajUser(args.user)

if args.pwd:
    pwd = args.pwd
else:
    pwd = sha1("%s" % randint(1, 1e99)).hexdigest()