def test_update_CHANGELOG(chObject_new, load_changelog_file, push_changelog_file, db_obj, conn_api, dearchive_file, del_from_db, clean_tmp): log = logging.getLogger('test_update_CHANGELOG') log.debug('Testing the \'update_CHANGELOG\' function') CONFIG.set('ctree', 'backup', os.getenv('DATADIR') + "/ctree/") CONFIG.set('ctree', 'chlog_fpath', '/ctree/MOCK_CHANGELOG') chObject_new.print_changelog(ifile=load_changelog_file.name) chObject_new.update_CHANGELOG(load_changelog_file.name, db=db_obj, api=conn_api) del_from_db.append(load_changelog_file.name) dearchive_file.append(push_changelog_file)
def __init__(self, type, id, **kwargs): ena_rec.debug('Creating an ENArecord object') self.type = type self.accession = id allowed_keys_str = CONFIG.get('ena', 'fields').replace('\n', '') allowed_keys = set(allowed_keys_str.split(",")) self.__dict__.update((k, v) for k, v in kwargs.items() if k in allowed_keys)
def test_push_ctree(db_obj, conn_api, load_staging_tree, push_prod_tree, dearchive_file, clean_tmp, del_from_db): log = logging.getLogger('test_push_ctree') log.debug('Testing \'push_ctree\' function') CONFIG.set('ctree', 'backup', os.getenv('DATADIR') + "/ctree/") ctree = CurrentTree( db=db_obj, api=conn_api, staging_tree=os.getenv('DATADIR') + "/ctree/current.staging.tree", prod_tree=os.getenv('DATADIR') + "/ctree/current.minus1.tree") fire_path = ctree.push_ctree(dry=False) del_from_db.append(load_staging_tree.name) dearchive_file.append(fire_path)
def modify_settings(request): """ Fixture to modify the settings.ini and generate a modified version that will be used in this file """ abs_dir = os.path.abspath(os.getenv('DATADIR')) CONFIG.set('ftp', 'ftp_mount', abs_dir) with open('settings_m.ini', 'w') as configfile: CONFIG.write(configfile) def fin(): print( '\n[teardown] modify_settings finalizer, deleting modified settings file' ) os.remove('settings_m.ini') request.addfinalizer(fin) return 'settings_m.ini'
def test_run_new(db_obj, conn_api, load_changelog_file, push_changelog_file, push_prod_tree, del_from_db, dearchive_file, clean_tmp): log = logging.getLogger('test_run_new') log.debug('Testing \'run\' function when there is an additional path in ' 'CurrentTree.staging_tree with respect to CurrentTree.prod_tree') ctree = CurrentTree( db=db_obj, api=conn_api, staging_tree=os.getenv('DATADIR') + "/ctree/current.staging.tree", prod_tree=os.getenv('DATADIR') + "/ctree/current.minus1.tree") CONFIG.set('ctree', 'backup', os.getenv('DATADIR') + "/ctree/") CONFIG.set('ctree', 'chlog_fpath', '/ctree/MOCK_CHANGELOG') CONFIG.set('ctree', 'chlog_details_dir', '/ftp/changelog_details_test') pushed_dict = ctree.run(chlog_f=load_changelog_file.name, limit=10, dry=False) for k in pushed_dict.keys(): if k == "chlog_details": for p in pushed_dict[k]: dearchive_file.append(p) else: dearchive_file.append(pushed_dict[k]) for p in pushed_dict['chlog_details']: del_from_db.append(f"{CONFIG.get('ftp', 'ftp_mount')}/{p}") del_from_db.append(load_changelog_file.name)
def delete_arch_file(modify_settings, db_obj, conn_api): """ Fixture to delete the moved test file/s from DB and to dearchive it from FIRE """ fileList = [] yield fileList print('\n[teardown] delete_arch_file finalizer, deleting file from db') CONFIG.read(modify_settings) for path in fileList: basename = os.path.basename(path) fObj = db_obj.fetch_file(basename=basename) # delete from DB db_obj.delete_file(fObj, dry=False) # dearchive from FIRE fire_path = re.sub(CONFIG.get('ftp', 'ftp_mount') + "/", '', path) fire_o = conn_api.fetch_object(firePath=fire_path) conn_api.delete_object(fireOid=fire_o.fireOid, dry=False) print(f"[teardown] delete_arch_file finalizer, deleting {path}")
def __init__(self, pwd): """ Constructor Parameters ---------- pwd : str Password for API. """ api_logger.debug('Creating an API object') self.user = CONFIG.get('fire', 'user') self.pwd = pwd
def set_conn(self): """ Function that will set the conn class variable Returns ------- conn: Connection object """ db_logger.debug('Setting connection...') # initialise ConfigParser object with connection # settings conn = pymysql.connect(host=CONFIG.get('mysql_conn', 'host'), user=CONFIG.get('mysql_conn', 'user'), password=self.pwd, db=self.dbname, port=CONFIG.getint('mysql_conn', 'port')) db_logger.debug('Connection successful!') return conn
def print_changelog(self, ifile): """ Function that adds an entry to the CHANGELOG report file Parameters ---------- ifile : str path to CHANGELOG file that will be updated. Returns ------- None """ now_str = self.dtime.strftime('%Y-%m-%d') now_str1 = self.dtime.strftime('%Y%m%d') lines_to_add = now_str + "\n\n" for state, value in self.__dict__.items(): size = 0 if type(value) is set: size = len(value) elif type(value) is dict: size = len(value.keys()) if size == 0: continue types = [] for p in value: # create File object to get its type fObj = File(name=p) types.append(fObj.guess_type()) # remove duplicates from list types = list(set(types)) types = [s.lower() for s in types] # get the changelog_details dir from config dirname = CONFIG.get('ctree', 'chlog_details_dir') lines_to_add += "Modification to: {0}\n\n".format(",".join(types)) lines_to_add += "Details can be found in\n" \ "{0}/changelog_details_{1}_{2}\n\n".format(dirname, now_str1, state) with open(ifile, 'r+') as f: content = f.read() f.seek(0, 0) f.write(lines_to_add.rstrip('\r\n') + '\n\n' + content)
def guess_type(self, xml_dict): """ Function to guess the type of a certain XML ENA response Parameters ---------- xml_dict : dict obtained from the ENA response after parsing with xmltodict function Returns ------- str : type of ENA XML (RUN, EXPERIMENT, STUDY, SAMPLE, ...) """ # guess the type of this record: i.e. RUN, EXPERIMWENT, ... valid = [x.strip() for x in CONFIG.get('ena', 'types').split(',')] type = list(xml_dict.keys())[0].replace('_SET', '') if type not in valid: raise Exception(f"{type} is not valid ENA record type") return type
def guess_type(self): """ Function to get the type of a file depending on the 'file_type_rules' section of CONFIG Returns ------- str : type of file """ assert CONFIG.has_section( 'file_type_rules' ) is True, "Provide a 'file_type_rules' section in your *.ini file" rules_dict = CONFIG._sections['file_type_rules'] ext = None ext = os.path.basename(self.name).split('.')[-1] assert ext is not None, f"*.ext could not be obtained from {self.name}" if ext not in rules_dict: # assign the default file type return rules_dict['default'] else: return rules_dict[ext]
def update_CHANGELOG(self, chlog_p, db, api, dry=True): """ Function to push the updated CHANGELOG file to FIRE. This function will do the following: 1) Update the CHANGELOG file metadata in the DB 2) Create a backup copy of the CHANGELOG file before being updated 3) Delete the old CHANGELOG file from FIRE 4) Push the new (updated) CHANGELOG file to FIRE 5) Delete the backed-up file if everything went well Parameters ---------- chlog_p : str path to updated CHANGELOG file that will be pushed to FIRE. db : DB connection object. api : API connection object. dry: bool, default=True Perform a dry run. Returns ------- str : Fire path of the updated CHANGELOG files Raises ------ Exception """ dtstr = self.dtime.now().strftime('%Y_%m_%dT%H%M%S') # update the CHANGELOG metadata in the DB chlog_obj = File(name=chlog_p) chlog_obj.md5 = chlog_obj.calc_md5() chlog_obj.size = os.path.getsize(chlog_obj.name) # get the current path to CHANGELOG so it is updated in DB chglog_p = f"{CONFIG.get('ftp', 'ftp_mount')}{CONFIG.get('ctree', 'chlog_fpath')}" db.update_file('md5', chlog_obj.md5, chglog_p, dry=dry) db.update_file('size', chlog_obj.size, chglog_p, dry=dry) ce_logger.info("Pushing updated CHANGELOG file to API") # to push the updated CHANGELOG you need to delete it from FIRE first old_file = api.retrieve_object( firePath=CONFIG.get('ctree', 'chlog_fpath'), outfile= f"{CONFIG.get('ctree','backup')}/{os.path.basename(chlog_p)}." f"{dtstr}.backup") if old_file is None: raise Exception(f"No CHANGELOG file retrieved from the archive") fire_obj = api.fetch_object( firePath=CONFIG.get('ctree', 'chlog_fpath')) if fire_obj is None: raise Exception(f"No CHANGELOG file retrieved from the archive") ce_logger.info("Delete CHANGELOG to be updated from the archive") api.delete_object(fireOid=fire_obj.fireOid, dry=dry) ce_logger.info("Push updated CHANGELOG file to the archive") api.push_object(chlog_obj, dry=dry, fire_path=CONFIG.get('ctree', 'chlog_fpath')) return f"{CONFIG.get('ctree','chlog_fpath')}"
def get_ctree(self, fields, outfile, limit=None): """ Function to dump DB file records and generate a current tree file pointed by outfile. This function will also parse each of the records in the 'file' table and creates a dict with the following information: { 'path' : md5 } Parameter -------- fields: list of str List with the fields from the 'file' table to be dumped. The order of the fields in the dumped file will be preserved. outfile: str File path for the current.same.tree output. limit: int, default = None Limit current.same.tree file to this int number of records If None then (all records will be dumped). Return ------ outfile : str path with current.tree. data_dict : dict Dict with md5s { 'path' : md5 } """ assert isinstance(fields, list) cursor = self.conn.cursor(pymysql.cursors.DictCursor) fields_str = ",".join(fields) if limit is None: query = f"SELECT {fields_str} FROM file" else: query = f"SELECT {fields_str} FROM file limit {limit}" fields.insert(1, "type") f = open(outfile, 'w') data_dict = {} # dict {'path' : 'md5' } cursor.execute(query) try: result_set = cursor.fetchall() if not result_set: db_logger.debug( f"No file retrieved from DB using using query:{query}") return None for row in result_set: row["name"] = row["name"].replace( CONFIG.get("ftp", "ftp_mount") + "/", "") if CONFIG.get("ftp", "staging_mount") in row["name"]: continue # skip files that are in any dir that is not the ftp/ dir, # as these files are not included in the current.tree file bits = row["name"].split("/") if bits[0] != "ftp": continue row["type"] = "file" data_dict[row["name"]] = row["md5"] for k in fields: f.write(f"{row[k]}\t") f.write("\n") cursor.close() self.conn.commit() except pymysql.Error as e: db_logger.error("Exception occurred", exc_info=True) # Rollback in case there is any error self.conn.rollback() f.close return outfile, data_dict
def run(self, chlog_f, dry=True, limit=None): """ Function to perform all operations involved in the comparison between the current.tree in the DB and the current.tree in the FTP Parameters ---------- chlog_f: str Path for CHANGELOG file that will be modified. dry: bool, default=True If False, then objects will be actually pushed to the archive and database will be modified. limit: int, optional Limit the number of records to retrieve from DB. Returns ------- dict or 0 If there is a ChangeEvents with entries in it then it will generate a dict with the following format: {'chlog_details' : chlog_details_list, 'chlog_firepath' : chlog_firepath, 'ctree_firepath' : ctree_firepath} If the ChangeEvents object has size = 0 then it will return 0 """ ct_logger.info("Starting CurrentTree.run() process") fields = ['name', 'size', 'updated', 'md5'] ct_logger.info(f"Dumping files from DB to {self.staging_tree}") db_dict = self.db.get_ctree(fields, outfile=self.staging_tree, limit=limit)[1] ct_logger.info(f"Number of records dumped: {len(db_dict.keys())}") ct_logger.info(f"Parsing records in {self.prod_tree}") file_dict = self.get_file_dict() ct_logger.info(f"Number of records parsed: {len(file_dict.keys())}") ct_logger.info( f"Looking for differences between {self.staging_tree} and {self.prod_tree}" ) chgEvents = self.cmp_dicts(db_dict=db_dict, file_dict=file_dict) ct_logger.info( f"Looking for differences between {self.staging_tree} and {self.prod_tree}. DONE!" ) if chgEvents.size() == 0: ct_logger.info( "No changes detected, nothing will be done. " "The current.tree file in the staging area will be removed") os.remove(self.staging_tree) return 0 else: ct_logger.info( "Changes detected in the data structures. Proceeding...") ofiles = chgEvents.print_chlog_details( odir=CONFIG.get('ctree', 'temp')) ct_logger.info("Pushing changelog_details_* files to archive...") chlog_details_list = chgEvents.push_chlog_details(pathlist=ofiles, db=self.db, api=self.api, dry=dry) chgEvents.print_changelog(ifile=chlog_f) ct_logger.info( "Updating and pushing to archive the updated CHANGELOG file..." ) chlog_firepath = chgEvents.update_CHANGELOG(chlog_f, db=self.db, api=self.api, dry=dry) ct_logger.info("Pushing to archive the new current.tree file...") ctree_firepath = self.push_ctree(dry=dry) ct_logger.info( "Pushing to archive the new current.tree file. DONE!") return { 'chlog_details': chlog_details_list, 'chlog_firepath': chlog_firepath, 'ctree_firepath': ctree_firepath }