Example #1
0
    def removeAll(self, force=False):
        '''
        Remove all bank releases and database records

        :param force: force removal even if some production dirs are freezed
        :type force: bool
        :return: bool
        '''
        if not force:
            has_freeze = False
            for prod in self.bank['production']:
                if 'freeze' in prod and prod['freeze']:
                    has_freeze = True
                    break
            if has_freeze:
                logging.error('Cannot remove bank, some production directories are freezed, use force if needed')
                return False

        self.banks.remove({'name': self.name})
        BmajIndex.delete_all_bank(self.name)
        bank_data_dir = self.get_data_dir()
        logging.warn('DELETE ' + bank_data_dir)
        if os.path.exists(bank_data_dir):
            shutil.rmtree(bank_data_dir)
        bank_offline_dir = os.path.join(self.config.get('data.dir'), self.config.get('offline.dir.name'))
        if os.path.exists(bank_offline_dir):
            shutil.rmtree(bank_offline_dir)
        bank_log_dir = os.path.join(self.config.get('log.dir'), self.name)
        if os.path.exists(bank_log_dir) and self.no_log:
            shutil.rmtree(bank_log_dir)
        return True
Example #2
0
    def load_config(config_file=None, allow_user_config=True):
        '''
        Loads general config

        :param config_file: global.properties file path
        :type config_file: str
        :param allow_user_config: use ~/.biomaj.cfg if present
        :type allow_user_config: bool
        '''
        if config_file is None:
            env_file = os.environ.get('BIOMAJ_CONF')
            if env_file is not None and os.path.exists(env_file):
                config_file = env_file
            else:
                env_file = 'global.properties'
                if os.path.exists(env_file):
                    config_file = env_file

        if config_file is None or not os.path.exists(config_file):
            raise Exception('Missing global configuration file')

        BiomajConfig.config_file = os.path.abspath(config_file)

        BiomajConfig.global_config = configparser.ConfigParser()

        if allow_user_config and os.path.exists(
                os.path.expanduser('~/.biomaj.cfg')):
            BiomajConfig.user_config_file = os.path.expanduser('~/.biomaj.cfg')
            BiomajConfig.user_config = configparser.ConfigParser()
            BiomajConfig.user_config.read(
                [os.path.expanduser('~/.biomaj.cfg')])
        else:
            BiomajConfig.user_config_file = None

        BiomajConfig.global_config.read([config_file])

        # ElasticSearch indexation support
        do_index = False
        if BiomajConfig.global_config.get('GENERAL', 'use_elastic') and \
          BiomajConfig.global_config.get('GENERAL', 'use_elastic') == "1":
            do_index = True
        if do_index:
            if BiomajConfig.global_config.get('GENERAL', 'elastic_nodes'):
                elastic_hosts = BiomajConfig.global_config.get(
                    'GENERAL', 'elastic_nodes').split(',')
            else:
                elastic_hosts = ['localhost']
            elastic_index = BiomajConfig.global_config.get(
                'GENERAL', 'elastic_index')
            if elastic_index is None:
                elastic_index = 'biomaj'

            if BiomajConfig.global_config.has_option('GENERAL', 'test') and \
                BiomajConfig.global_config.get('GENERAL', 'test') == "1":
                # Test connection to elasticsearch, if not available skip indexing for tests
                BmajIndex.skip_if_failure = True

            BmajIndex.load(index=elastic_index,
                           hosts=elastic_hosts,
                           do_index=do_index)
Example #3
0
    def remove_session(self, sid):
        '''
        Delete a session from db

        :param sid: id of the session
        :type sid: long
        :return: bool
        '''
        session_release = None
        _tmpbank = self.banks.find_one({'name': self.name})
        for s in _tmpbank['sessions']:
            if s['id'] == sid:
                session_release = s['release']
        if session_release is not None:
            self.banks.update({'name': self.name}, {'$pull': {
                'sessions': {'id': sid},
                'production': {'session': sid}
            },
                '$unset': {
                    'pending.' + session_release: ''
                }
            })
        else:
            self.banks.update({'name': self.name}, {'$pull': {
                'sessions': {'id': sid},
                'production': {'session': sid}
            }
            })
        # Update object
        self.bank = self.banks.find_one({'name': self.name})
        if session_release is not None:
            BmajIndex.remove(self.name, session_release)
        return True
Example #4
0
    def removeAll(self, force=False):
        '''
        Remove all bank releases and database records

        :param force: force removal even if some production dirs are freezed
        :type force: bool
        :return: bool
        '''
        if not force:
            has_freeze = False
            for prod in self.bank['production']:
                if 'freeze' in prod and prod['freeze']:
                    has_freeze = True
                    break
            if has_freeze:
                logging.error(
                    'Cannot remove bank, some production directories are freezed, use force if needed'
                )
                return False

        self.banks.remove({'name': self.name})
        BmajIndex.delete_all_bank(self.name)
        bank_data_dir = self.get_data_dir()
        logging.warn('DELETE ' + bank_data_dir)
        if os.path.exists(bank_data_dir):
            shutil.rmtree(bank_data_dir)
        bank_offline_dir = os.path.join(self.config.get('data.dir'),
                                        self.config.get('offline.dir.name'))
        if os.path.exists(bank_offline_dir):
            shutil.rmtree(bank_offline_dir)
        bank_log_dir = os.path.join(self.config.get('log.dir'), self.name)
        if os.path.exists(bank_log_dir) and self.no_log:
            shutil.rmtree(bank_log_dir)
        return True
Example #5
0
 def tearDown(self):
     data_dir = self.config.get('data.dir')
     lock_file = os.path.join(data_dir, 'local.lock')
     if os.path.exists(lock_file):
         os.remove(lock_file)
     self.utils.clean()
     BmajIndex.delete_all_bank('test')
Example #6
0
    def test_index(self):
        BmajIndex.do_index = True
        prod = {
            "data_dir": "/tmp/test/data",
            "formats": {
                "fasta": [{
                    "files": ["fasta/chr1.fa", "fasta/chr2.fa"],
                    "types": ["nucleic"],
                    "tags": {
                        "organism": "hg19"
                    }
                }],
                "blast": [{
                    "files": ["blast/chr1/chr1db"],
                    "types": ["nucleic"],
                    "tags": {
                        "chr": "chr1",
                        "organism": "hg19"
                    }
                }]
            },
            "freeze": False,
            "session": 1416229253.930908,
            "prod_dir": "alu-2003-11-26",
            "release": "2003-11-26",
            "types": ["nucleic"]
        }

        BmajIndex.add('test', prod, True)

        query = {'query': {'match': {'bank': 'test'}}}
        res = BmajIndex.search(query)
        self.assertTrue(len(res) == 2)
Example #7
0
 def tearDown(self):
     data_dir = self.config.get('data.dir')
     lock_file = os.path.join(data_dir,'local.lock')
     if os.path.exists(lock_file):
       os.remove(lock_file)
     self.utils.clean()
     BmajIndex.delete_all_bank('test')
Example #8
0
    def load_config(config_file=None, allow_user_config=True):
        '''
        Loads general config

        :param config_file: global.properties file path
        :type config_file: str
        :param allow_user_config: use ~/.biomaj.cfg if present
        :type allow_user_config: bool
        '''
        if config_file is None:
            env_file = os.environ.get('BIOMAJ_CONF')
            if env_file is not None and os.path.exists(env_file):
                config_file = env_file
            else:
                env_file = 'global.properties'
                if os.path.exists(env_file):
                    config_file = env_file

        if config_file is None or not os.path.exists(config_file):
            raise Exception('Missing global configuration file')

        BiomajConfig.config_file = os.path.abspath(config_file)

        BiomajConfig.global_config = configparser.ConfigParser()

        if allow_user_config and os.path.exists(os.path.expanduser('~/.biomaj.cfg')):
            BiomajConfig.user_config_file = os.path.expanduser('~/.biomaj.cfg')
            BiomajConfig.user_config = configparser.ConfigParser()
            BiomajConfig.user_config.read([os.path.expanduser('~/.biomaj.cfg')])
        else:
            BiomajConfig.user_config_file = None

        BiomajConfig.global_config.read([config_file])

        # ElasticSearch indexation support
        do_index = False
        if BiomajConfig.global_config.get('GENERAL', 'use_elastic') and \
          BiomajConfig.global_config.get('GENERAL', 'use_elastic') == "1":
            do_index = True
        if do_index:
            if BiomajConfig.global_config.get('GENERAL', 'elastic_nodes'):
                elastic_hosts = BiomajConfig.global_config.get('GENERAL', 'elastic_nodes').split(',')
            else:
                elastic_hosts = ['localhost']
            elastic_index = BiomajConfig.global_config.get('GENERAL', 'elastic_index')
            if elastic_index is None:
                elastic_index = 'biomaj'

            if BiomajConfig.global_config.has_option('GENERAL', 'test') and \
                BiomajConfig.global_config.get('GENERAL', 'test') == "1":
                # Test connection to elasticsearch, if not available skip indexing for tests
                BmajIndex.skip_if_failure = True


            BmajIndex.load(index=elastic_index, hosts=elastic_hosts,
                                                    do_index=do_index)
Example #9
0
 def test_remove_all(self):
     self.test_index()
     query = {
       'query' : {
         'match' : {'bank': 'test'}
         }
       }
     BmajIndex.delete_all_bank('test')
     res = BmajIndex.search(query)
     self.assertTrue(len(res)==0)
Example #10
0
    def test_index(self):
        BmajIndex.do_index = True
        prod = {
    			"data_dir" : "/tmp/test/data",
    			"formats" : {
    				"fasta" : [
    					{
    						"files" : [
    							"fasta/chr1.fa",
    							"fasta/chr2.fa"
    						],
    						"types" : [
    							"nucleic"
    						],
    						"tags" : {
    							"organism" : "hg19"
    						}
    					}
    				],
    				"blast": [
    					{
    						"files" : [
    							"blast/chr1/chr1db"
    						],
    						"types" : [
    							"nucleic"
    						],
    						"tags" : {
    							"chr" : "chr1",
    							"organism" : "hg19"
    						}
    					}
    				]

    			},
    			"freeze" : False,
    			"session" : 1416229253.930908,
    			"prod_dir" : "alu-2003-11-26",
    			"release" : "2003-11-26",
    			"types" : [
    				"nucleic"
    			]
    		}

        BmajIndex.add('test',prod, True)

        query = {
          'query' : {
            'match' : {'bank': 'test'}
            }
          }
        res = BmajIndex.search(query)
        self.assertTrue(len(res)==2)
Example #11
0
    def setUp(self):
        BmajIndex.es = None
        self.utils = UtilsForTest()
        curdir = os.path.dirname(os.path.realpath(__file__))
        BiomajConfig.load_config(self.utils.global_properties, allow_user_config=False)
        if BmajIndex.do_index == False:
            self.skipTest("Skipping indexing tests due to elasticsearch not available")
        # Delete all banks
        b = Bank('local')
        b.banks.remove({})
        BmajIndex.delete_all_bank('local')

        self.config = BiomajConfig('local')
        data_dir = self.config.get('data.dir')
        lock_file = os.path.join(data_dir,'local.lock')
        if os.path.exists(lock_file):
          os.remove(lock_file)
Example #12
0
    def setUp(self):
        BmajIndex.es = None
        self.utils = UtilsForTest()
        curdir = os.path.dirname(os.path.realpath(__file__))
        BiomajConfig.load_config(self.utils.global_properties,
                                 allow_user_config=False)
        if BmajIndex.do_index == False:
            self.skipTest(
                "Skipping indexing tests due to elasticsearch not available")
        # Delete all banks
        b = Bank('local')
        b.banks.remove({})
        BmajIndex.delete_all_bank('local')

        self.config = BiomajConfig('local')
        data_dir = self.config.get('data.dir')
        lock_file = os.path.join(data_dir, 'local.lock')
        if os.path.exists(lock_file):
            os.remove(lock_file)
Example #13
0
    def remove_session(self, sid):
        '''
        Delete a session from db

        :param sid: id of the session
        :type sid: long
        :return: bool
        '''
        session_release = None
        _tmpbank = self.banks.find_one({'name': self.name})
        for s in _tmpbank['sessions']:
            if s['id'] == sid:
                session_release = s['release']


        cache_dir = self.config.get('cache.dir')
        download_files = os.path.join(cache_dir, 'files_'+str(sid))
        if os.path.exists(download_files):
            os.remove(download_files)

        local_files = os.path.join(cache_dir, 'local_files_'+str(sid))
        if os.path.exists(local_files):
            os.remove(local_files)

        if self.config.get_bool('keep.old.sessions'):
            logging.debug('keep old sessions')
            if session_release is not None:
                self.banks.update({'name': self.name}, {'$pull': {
                    'production': {'session': sid}
                },
                    '$unset': {
                        'pending.' + session_release: ''
                    }
                })
            else:
                self.banks.update({'name': self.name}, {'$pull': {
                    'production': {'session': sid}
                }
                })
            self.banks.update({'name': self.name, 'sessions.id': sid},
                              {'$set': {'sessions.$.deleted': time.time()}})
        else:
            if session_release is not None:
                self.banks.update({'name': self.name}, {'$pull': {
                    'sessions': {'id': sid},
                    'production': {'session': sid}
                },
                    '$unset': {
                        'pending.' + session_release: ''
                    }
                })
            else:
                self.banks.update({'name': self.name}, {'$pull': {
                    'sessions': {'id': sid},
                    'production': {'session': sid}
                }
                })
        # Update object
        self.bank = self.banks.find_one({'name': self.name})
        if session_release is not None:
            BmajIndex.remove(self.name, session_release)
        return True
Example #14
0
    def save_session(self):
        '''
        Save session in database
        '''
        self.session._session['last_update_time'] = time.time()
        self.session._session['log_file'] = self.config.log_file
        if self.use_last_session:
            # Remove last session
            self.banks.update({'name': self.name}, {'$pull': {'sessions': {'id': self.session._session['id']}}})
        # Insert session
        if self.session.get('action') == 'update':
            action = 'last_update_session'
        if self.session.get('action') == 'remove':
            action = 'last_remove_session'


        cache_dir = self.config.get('cache.dir')
        download_files = self.session.get('download_files')
        if download_files is not None:
            f_downloaded_files = open(os.path.join(cache_dir, 'files_'+str(self.session.get('id'))), 'w')
            f_downloaded_files.write(json.dumps(download_files))
            f_downloaded_files.close()
            self.session.set('download_files',[])

        local_files = self.session.get('files')
        if local_files is not None:
            f_local_files = open(os.path.join(cache_dir, 'local_files_'+str(self.session.get('id'))), 'w')
            f_local_files.write(json.dumps(download_files))
            f_local_files.close()
            self.session.set('files',[])


        self.banks.update({'name': self.name}, {
            '$set': {
                action: self.session._session['id'],
                'properties': self.get_properties()
            },
            '$push': {'sessions': self.session._session}
        })
        BmajIndex.add(self.name, self.session._session)
        if self.session.get('action') == 'update' and not self.session.get_status(
                Workflow.FLOW_OVER) and self.session.get('release'):
            self.banks.update({'name': self.name},
                              {'$set': {'pending.' + self.session.get('release'): self.session._session['id']}})
        if self.session.get('action') == 'update' and self.session.get_status(Workflow.FLOW_OVER) and self.session.get(
                'update'):
            # We expect that a production release has reached the FLOW_OVER status.
            # If no update is needed (same release etc...), the *update* session of the session is set to False
            logging.debug('Bank:Save:' + self.name)
            if len(self.bank['production']) > 0:
                # Remove from database
                self.banks.update({'name': self.name},
                                  {'$pull': {'production': {'release': self.session._session['release']}}})
                # Update local object
                # index = 0
                # for prod in self.bank['production']:
                #  if prod['release'] == self.session._session['release']:
                #    break;
                #  index += 1
                # if index < len(self.bank['production']):
                #  self.bank['production'].pop(index)
            release_types = []
            if self.config.get('db.type'):
                release_types = self.config.get('db.type').split(',')
            release_formats = list(self.session._session['formats'].keys())
            if self.config.get('db.formats'):
                config_formats = self.config.get('db.formats').split(',')
                for config_format in config_formats:
                    if config_format not in release_formats:
                        release_formats.append(config_format)

            for release_format in self.session._session['formats']:
                for release_files in self.session._session['formats'][release_format]:
                    if release_files['types']:
                        for rtype in release_files['types']:
                            if rtype not in release_types:
                                release_types.append(rtype)
            prod_dir = self.session.get_release_directory()
            if self.session.get('prod_dir'):
                prod_dir = self.session.get('prod_dir')
            production = {'release': self.session.get('release'),
                          'remoterelease': self.session.get('remoterelease'),
                          'session': self.session._session['id'],
                          'formats': release_formats,
                          'types': release_types,
                          'size': self.session.get('fullsize'),
                          'data_dir': self.session._session['data_dir'],
                          'dir_version': self.session._session['dir_version'],
                          'prod_dir': prod_dir,
                          'freeze': False}
            self.bank['production'].append(production)

            self.banks.update({'name': self.name},
                              {'$push': {'production': production},
                               '$unset': {'pending.' + self.session.get('release'): ''}
                               })

            # self.banks.update({'name': self.name},
            #                  {'$unset': 'pending.'+self.session.get('release')
            #                  })

        self.bank = self.banks.find_one({'name': self.name})
Example #15
0
 def searchindex(query):
     return BmajIndex.searchq(query)
Example #16
0
 def test_remove_all(self):
     self.test_index()
     query = {'query': {'match': {'bank': 'test'}}}
     BmajIndex.delete_all_bank('test')
     res = BmajIndex.search(query)
     self.assertTrue(len(res) == 0)
Example #17
0
    def save_session(self):
        '''
        Save session in database
        '''
        self.session._session['last_update_time'] = time.time()
        self.session._session['log_file'] = self.config.log_file
        if self.use_last_session:
            # Remove last session
            self.banks.update(
                {'name': self.name},
                {'$pull': {
                    'sessions': {
                        'id': self.session._session['id']
                    }
                }})
        # Insert session
        if self.session.get('action') == 'update':
            action = 'last_update_session'
        if self.session.get('action') == 'remove':
            action = 'last_remove_session'

        cache_dir = self.config.get('cache.dir')
        download_files = self.session.get('download_files')
        if download_files is not None:
            f_downloaded_files = open(
                os.path.join(cache_dir,
                             'files_' + str(self.session.get('id'))), 'w')
            f_downloaded_files.write(json.dumps(download_files))
            f_downloaded_files.close()
            self.session.set('download_files', [])

        local_files = self.session.get('files')
        if local_files is not None:
            f_local_files = open(
                os.path.join(cache_dir,
                             'local_files_' + str(self.session.get('id'))),
                'w')
            f_local_files.write(json.dumps(download_files))
            f_local_files.close()
            self.session.set('files', [])

        self.banks.update({'name': self.name}, {
            '$set': {
                action: self.session._session['id'],
                'properties': self.get_properties()
            },
            '$push': {
                'sessions': self.session._session
            }
        })
        BmajIndex.add(self.name, self.session._session)
        if self.session.get(
                'action') == 'update' and not self.session.get_status(
                    Workflow.FLOW_OVER) and self.session.get('release'):
            self.banks.update({'name': self.name}, {
                '$set': {
                    'pending.' + self.session.get('release'):
                    self.session._session['id']
                }
            })
        if self.session.get('action') == 'update' and self.session.get_status(
                Workflow.FLOW_OVER) and self.session.get('update'):
            # We expect that a production release has reached the FLOW_OVER status.
            # If no update is needed (same release etc...), the *update* session of the session is set to False
            logging.debug('Bank:Save:' + self.name)
            if len(self.bank['production']) > 0:
                # Remove from database
                self.banks.update({'name': self.name}, {
                    '$pull': {
                        'production': {
                            'release': self.session._session['release']
                        }
                    }
                })
                # Update local object
                # index = 0
                # for prod in self.bank['production']:
                #  if prod['release'] == self.session._session['release']:
                #    break;
                #  index += 1
                # if index < len(self.bank['production']):
                #  self.bank['production'].pop(index)
            release_types = []
            if self.config.get('db.type'):
                release_types = self.config.get('db.type').split(',')
            release_formats = list(self.session._session['formats'].keys())
            if self.config.get('db.formats'):
                config_formats = self.config.get('db.formats').split(',')
                for config_format in config_formats:
                    if config_format not in release_formats:
                        release_formats.append(config_format)

            for release_format in self.session._session['formats']:
                for release_files in self.session._session['formats'][
                        release_format]:
                    if release_files['types']:
                        for rtype in release_files['types']:
                            if rtype not in release_types:
                                release_types.append(rtype)
            prod_dir = self.session.get_release_directory()
            if self.session.get('prod_dir'):
                prod_dir = self.session.get('prod_dir')
            production = {
                'release': self.session.get('release'),
                'remoterelease': self.session.get('remoterelease'),
                'session': self.session._session['id'],
                'formats': release_formats,
                'types': release_types,
                'size': self.session.get('fullsize'),
                'data_dir': self.session._session['data_dir'],
                'dir_version': self.session._session['dir_version'],
                'prod_dir': prod_dir,
                'freeze': False
            }
            self.bank['production'].append(production)

            self.banks.update({'name': self.name}, {
                '$push': {
                    'production': production
                },
                '$unset': {
                    'pending.' + self.session.get('release'): ''
                }
            })

            # self.banks.update({'name': self.name},
            #                  {'$unset': 'pending.'+self.session.get('release')
            #                  })

        self.bank = self.banks.find_one({'name': self.name})
Example #18
0
    def remove_session(self, sid):
        '''
        Delete a session from db

        :param sid: id of the session
        :type sid: long
        :return: bool
        '''
        session_release = None
        _tmpbank = self.banks.find_one({'name': self.name})
        for s in _tmpbank['sessions']:
            if s['id'] == sid:
                session_release = s['release']

        cache_dir = self.config.get('cache.dir')
        download_files = os.path.join(cache_dir, 'files_' + str(sid))
        if os.path.exists(download_files):
            os.remove(download_files)

        local_files = os.path.join(cache_dir, 'local_files_' + str(sid))
        if os.path.exists(local_files):
            os.remove(local_files)

        if self.config.get_bool('keep.old.sessions'):
            logging.debug('keep old sessions')
            if session_release is not None:
                self.banks.update({'name': self.name}, {
                    '$pull': {
                        'production': {
                            'session': sid
                        }
                    },
                    '$unset': {
                        'pending.' + session_release: ''
                    }
                })
            else:
                self.banks.update({'name': self.name},
                                  {'$pull': {
                                      'production': {
                                          'session': sid
                                      }
                                  }})
            self.banks.update({
                'name': self.name,
                'sessions.id': sid
            }, {'$set': {
                'sessions.$.deleted': time.time()
            }})
        else:
            if session_release is not None:
                self.banks.update({'name': self.name}, {
                    '$pull': {
                        'sessions': {
                            'id': sid
                        },
                        'production': {
                            'session': sid
                        }
                    },
                    '$unset': {
                        'pending.' + session_release: ''
                    }
                })
            else:
                self.banks.update({'name': self.name}, {
                    '$pull': {
                        'sessions': {
                            'id': sid
                        },
                        'production': {
                            'session': sid
                        }
                    }
                })
        # Update object
        self.bank = self.banks.find_one({'name': self.name})
        if session_release is not None:
            BmajIndex.remove(self.name, session_release)
        return True
Example #19
0
 def searchindex(query):
     return BmajIndex.searchq(query)