コード例 #1
0
def test_update_CHANGELOG(chObject_new, load_changelog_file,
                          push_changelog_file, db_obj, conn_api,
                          dearchive_file, del_from_db, clean_tmp):

    log = logging.getLogger('test_update_CHANGELOG')
    log.debug('Testing the \'update_CHANGELOG\' function')

    CONFIG.set('ctree', 'backup', os.getenv('DATADIR') + "/ctree/")
    CONFIG.set('ctree', 'chlog_fpath', '/ctree/MOCK_CHANGELOG')

    chObject_new.print_changelog(ifile=load_changelog_file.name)
    chObject_new.update_CHANGELOG(load_changelog_file.name,
                                  db=db_obj,
                                  api=conn_api)

    del_from_db.append(load_changelog_file.name)
    dearchive_file.append(push_changelog_file)
コード例 #2
0
ファイル: ena_record.py プロジェクト: igsr/igsr_archive
    def __init__(self, type, id, **kwargs):

        ena_rec.debug('Creating an ENArecord object')
       
        self.type = type
        self.accession = id
        allowed_keys_str = CONFIG.get('ena', 'fields').replace('\n', '')
        allowed_keys = set(allowed_keys_str.split(","))
        self.__dict__.update((k, v) for k, v in kwargs.items() if k in allowed_keys)
コード例 #3
0
def test_push_ctree(db_obj, conn_api, load_staging_tree, push_prod_tree,
                    dearchive_file, clean_tmp, del_from_db):
    log = logging.getLogger('test_push_ctree')

    log.debug('Testing \'push_ctree\' function')

    CONFIG.set('ctree', 'backup', os.getenv('DATADIR') + "/ctree/")

    ctree = CurrentTree(
        db=db_obj,
        api=conn_api,
        staging_tree=os.getenv('DATADIR') + "/ctree/current.staging.tree",
        prod_tree=os.getenv('DATADIR') + "/ctree/current.minus1.tree")

    fire_path = ctree.push_ctree(dry=False)

    del_from_db.append(load_staging_tree.name)
    dearchive_file.append(fire_path)
コード例 #4
0
def modify_settings(request):
    """
    Fixture to modify the settings.ini
    and generate a modified version that will be used
    in this file
    """
    abs_dir = os.path.abspath(os.getenv('DATADIR'))
    CONFIG.set('ftp', 'ftp_mount', abs_dir)

    with open('settings_m.ini', 'w') as configfile:
        CONFIG.write(configfile)

    def fin():
        print(
            '\n[teardown] modify_settings finalizer, deleting modified settings file'
        )
        os.remove('settings_m.ini')

    request.addfinalizer(fin)

    return 'settings_m.ini'
コード例 #5
0
def test_run_new(db_obj, conn_api, load_changelog_file, push_changelog_file,
                 push_prod_tree, del_from_db, dearchive_file, clean_tmp):
    log = logging.getLogger('test_run_new')

    log.debug('Testing \'run\' function when there is an additional path in '
              'CurrentTree.staging_tree with respect to CurrentTree.prod_tree')

    ctree = CurrentTree(
        db=db_obj,
        api=conn_api,
        staging_tree=os.getenv('DATADIR') + "/ctree/current.staging.tree",
        prod_tree=os.getenv('DATADIR') + "/ctree/current.minus1.tree")

    CONFIG.set('ctree', 'backup', os.getenv('DATADIR') + "/ctree/")
    CONFIG.set('ctree', 'chlog_fpath', '/ctree/MOCK_CHANGELOG')
    CONFIG.set('ctree', 'chlog_details_dir', '/ftp/changelog_details_test')

    pushed_dict = ctree.run(chlog_f=load_changelog_file.name,
                            limit=10,
                            dry=False)

    for k in pushed_dict.keys():
        if k == "chlog_details":
            for p in pushed_dict[k]:
                dearchive_file.append(p)
        else:
            dearchive_file.append(pushed_dict[k])

    for p in pushed_dict['chlog_details']:
        del_from_db.append(f"{CONFIG.get('ftp', 'ftp_mount')}/{p}")

    del_from_db.append(load_changelog_file.name)
コード例 #6
0
def delete_arch_file(modify_settings, db_obj, conn_api):
    """
    Fixture to delete the moved test file/s
    from DB and to dearchive it from FIRE
    """
    fileList = []
    yield fileList
    print('\n[teardown] delete_arch_file finalizer, deleting file from db')

    CONFIG.read(modify_settings)

    for path in fileList:
        basename = os.path.basename(path)
        fObj = db_obj.fetch_file(basename=basename)
        # delete from DB
        db_obj.delete_file(fObj, dry=False)
        # dearchive from FIRE
        fire_path = re.sub(CONFIG.get('ftp', 'ftp_mount') + "/", '', path)
        fire_o = conn_api.fetch_object(firePath=fire_path)
        conn_api.delete_object(fireOid=fire_o.fireOid, dry=False)

        print(f"[teardown] delete_arch_file finalizer, deleting {path}")
コード例 #7
0
ファイル: api.py プロジェクト: igsr/igsr_archive
    def __init__(self, pwd):
        """
        Constructor

        Parameters
        ----------
        pwd : str
              Password for API.
        """

        api_logger.debug('Creating an API object')

        self.user = CONFIG.get('fire', 'user')
        self.pwd = pwd
コード例 #8
0
ファイル: db.py プロジェクト: igsr/igsr_archive
    def set_conn(self):
        """
        Function that will set the conn
        class variable

        Returns
        -------
        conn: Connection object
        """

        db_logger.debug('Setting connection...')

        # initialise ConfigParser object with connection
        # settings

        conn = pymysql.connect(host=CONFIG.get('mysql_conn', 'host'),
                               user=CONFIG.get('mysql_conn', 'user'),
                               password=self.pwd,
                               db=self.dbname,
                               port=CONFIG.getint('mysql_conn', 'port'))

        db_logger.debug('Connection successful!')

        return conn
コード例 #9
0
ファイル: change_events.py プロジェクト: igsr/igsr_archive
    def print_changelog(self, ifile):
        """
        Function that adds an entry to the CHANGELOG report
        file

        Parameters
        ----------
        ifile : str
                path to CHANGELOG file that will be updated.
        
        Returns
        -------
        None
        """
        now_str = self.dtime.strftime('%Y-%m-%d')
        now_str1 = self.dtime.strftime('%Y%m%d')

        lines_to_add = now_str + "\n\n"
        for state, value in self.__dict__.items():
            size = 0
            if type(value) is set:
                size = len(value)
            elif type(value) is dict:
                size = len(value.keys())
            if size == 0: continue
            types = []
            for p in value:
                # create File object to get its type
                fObj = File(name=p)
                types.append(fObj.guess_type())
            # remove duplicates from list
            types = list(set(types))
            types = [s.lower() for s in types]
            # get the changelog_details dir from config
            dirname = CONFIG.get('ctree', 'chlog_details_dir')
            lines_to_add += "Modification to: {0}\n\n".format(",".join(types))
            lines_to_add += "Details can be found in\n" \
                            "{0}/changelog_details_{1}_{2}\n\n".format(dirname,
                                                                       now_str1, state)
        with open(ifile, 'r+') as f:
            content = f.read()
            f.seek(0, 0)
            f.write(lines_to_add.rstrip('\r\n') + '\n\n' + content)
コード例 #10
0
    def guess_type(self, xml_dict):
        """
        Function to guess the type of a certain XML ENA response

        Parameters
        ----------
        xml_dict : dict obtained from the ENA response 
                   after parsing with xmltodict function

        Returns
        -------
        str : type of ENA XML (RUN, EXPERIMENT, STUDY, SAMPLE, ...)
        """

        # guess the type of this record: i.e. RUN, EXPERIMWENT, ...
        valid = [x.strip() for x in CONFIG.get('ena', 'types').split(',')]

        type = list(xml_dict.keys())[0].replace('_SET', '')
        if type not in valid:
            raise Exception(f"{type} is not valid ENA record type")

        return type
コード例 #11
0
    def guess_type(self):
        """
        Function to get the type of a file depending on the
        'file_type_rules' section of CONFIG
        
        Returns
        -------
        str : type of file
        """
        assert CONFIG.has_section(
            'file_type_rules'
        ) is True, "Provide a 'file_type_rules' section in your *.ini file"

        rules_dict = CONFIG._sections['file_type_rules']

        ext = None
        ext = os.path.basename(self.name).split('.')[-1]
        assert ext is not None, f"*.ext could not be obtained from {self.name}"

        if ext not in rules_dict:
            # assign the default file type
            return rules_dict['default']
        else:
            return rules_dict[ext]
コード例 #12
0
ファイル: change_events.py プロジェクト: igsr/igsr_archive
    def update_CHANGELOG(self, chlog_p, db, api, dry=True):
        """
        Function to push the updated CHANGELOG file
        to FIRE. This function will do the following:

        1) Update the CHANGELOG file metadata in the DB
        2) Create a backup copy of the CHANGELOG file before being updated
        3) Delete the old CHANGELOG file from FIRE
        4) Push the new (updated) CHANGELOG file to FIRE
        5) Delete the backed-up file if everything went well

        Parameters
        ----------
        chlog_p : str
                  path to updated CHANGELOG file that will be pushed to FIRE.
        db : DB connection object.
        api : API connection object.
        dry: bool, default=True
             Perform a dry run.

        Returns
        -------
        str : Fire path of the updated CHANGELOG files

        Raises
        ------
        Exception
        """
        dtstr = self.dtime.now().strftime('%Y_%m_%dT%H%M%S')
        # update the CHANGELOG metadata in the DB
        chlog_obj = File(name=chlog_p)
        chlog_obj.md5 = chlog_obj.calc_md5()
        chlog_obj.size = os.path.getsize(chlog_obj.name)
        # get the current path to CHANGELOG so it is updated in DB
        chglog_p = f"{CONFIG.get('ftp', 'ftp_mount')}{CONFIG.get('ctree', 'chlog_fpath')}"
        db.update_file('md5', chlog_obj.md5, chglog_p, dry=dry)
        db.update_file('size', chlog_obj.size, chglog_p, dry=dry)

        ce_logger.info("Pushing updated CHANGELOG file to API")
        # to push the updated CHANGELOG you need to delete it from FIRE first
        old_file = api.retrieve_object(
            firePath=CONFIG.get('ctree', 'chlog_fpath'),
            outfile=
            f"{CONFIG.get('ctree','backup')}/{os.path.basename(chlog_p)}."
            f"{dtstr}.backup")
        if old_file is None:
            raise Exception(f"No CHANGELOG file retrieved from the archive")

        fire_obj = api.fetch_object(
            firePath=CONFIG.get('ctree', 'chlog_fpath'))

        if fire_obj is None:
            raise Exception(f"No CHANGELOG file retrieved from the archive")

        ce_logger.info("Delete CHANGELOG to be updated from the archive")
        api.delete_object(fireOid=fire_obj.fireOid, dry=dry)

        ce_logger.info("Push updated CHANGELOG file to the archive")
        api.push_object(chlog_obj,
                        dry=dry,
                        fire_path=CONFIG.get('ctree', 'chlog_fpath'))

        return f"{CONFIG.get('ctree','chlog_fpath')}"
コード例 #13
0
ファイル: db.py プロジェクト: igsr/igsr_archive
    def get_ctree(self, fields, outfile, limit=None):
        """
        Function to dump DB file records and generate
        a current tree file pointed by outfile.
        This function will also parse each of the records
        in the 'file' table and creates a dict with the following information:
        { 'path' : md5 }

        Parameter
        --------
        fields: list of str
                List with the fields from the 'file' table to
                be dumped. The order of the fields in the dumped
                file will be preserved.
        outfile: str
                 File path for the current.same.tree output.
        limit: int, default = None
               Limit current.same.tree file to this int number of records
               If None then (all records will be dumped).

        Return
        ------
        outfile : str
                  path with current.tree.
        data_dict : dict
                    Dict with md5s
                    { 'path' : md5 }
        """
        assert isinstance(fields, list)

        cursor = self.conn.cursor(pymysql.cursors.DictCursor)
        fields_str = ",".join(fields)
        if limit is None:
            query = f"SELECT {fields_str} FROM file"
        else:
            query = f"SELECT {fields_str} FROM file limit {limit}"

        fields.insert(1, "type")

        f = open(outfile, 'w')

        data_dict = {}  # dict {'path' : 'md5' }
        cursor.execute(query)
        try:
            result_set = cursor.fetchall()
            if not result_set:
                db_logger.debug(
                    f"No file retrieved from DB using using query:{query}")
                return None
            for row in result_set:
                row["name"] = row["name"].replace(
                    CONFIG.get("ftp", "ftp_mount") + "/", "")
                if CONFIG.get("ftp", "staging_mount") in row["name"]:
                    continue
                # skip files that are in any dir that is not the ftp/ dir,
                # as these files are not included in the current.tree file
                bits = row["name"].split("/")
                if bits[0] != "ftp":
                    continue
                row["type"] = "file"
                data_dict[row["name"]] = row["md5"]
                for k in fields:
                    f.write(f"{row[k]}\t")
                f.write("\n")
            cursor.close()
            self.conn.commit()
        except pymysql.Error as e:
            db_logger.error("Exception occurred", exc_info=True)
            # Rollback in case there is any error
            self.conn.rollback()
        f.close

        return outfile, data_dict
コード例 #14
0
ファイル: current_tree.py プロジェクト: igsr/igsr_archive
    def run(self, chlog_f, dry=True, limit=None):
        """
        Function to perform all operations involved in the comparison
        between the current.tree in the DB and the current.tree in the FTP

        Parameters
        ----------
        chlog_f:  str
                  Path for CHANGELOG file that will be modified.
        dry: bool, default=True
             If False, then objects will be actually pushed to the archive
             and database will be modified.
        limit: int, optional
               Limit the number of records to retrieve from DB.
              
        Returns
        -------
        dict or 0
            If there is a ChangeEvents with entries in it then it will generate a dict
            with the following format:

            {'chlog_details' : chlog_details_list,
            'chlog_firepath' : chlog_firepath,
            'ctree_firepath' : ctree_firepath}

            If the ChangeEvents object has size = 0 then it will return 0
        """
        ct_logger.info("Starting CurrentTree.run() process")

        fields = ['name', 'size', 'updated', 'md5']

        ct_logger.info(f"Dumping files from DB to {self.staging_tree}")
        db_dict = self.db.get_ctree(fields,
                                    outfile=self.staging_tree,
                                    limit=limit)[1]
        ct_logger.info(f"Number of records dumped: {len(db_dict.keys())}")

        ct_logger.info(f"Parsing records in {self.prod_tree}")
        file_dict = self.get_file_dict()
        ct_logger.info(f"Number of records parsed: {len(file_dict.keys())}")

        ct_logger.info(
            f"Looking for differences between {self.staging_tree} and {self.prod_tree}"
        )
        chgEvents = self.cmp_dicts(db_dict=db_dict, file_dict=file_dict)
        ct_logger.info(
            f"Looking for differences between {self.staging_tree} and {self.prod_tree}. DONE!"
        )
        if chgEvents.size() == 0:
            ct_logger.info(
                "No changes detected, nothing will be done. "
                "The current.tree file in the staging area will be removed")
            os.remove(self.staging_tree)
            return 0
        else:
            ct_logger.info(
                "Changes detected in the data structures. Proceeding...")
            ofiles = chgEvents.print_chlog_details(
                odir=CONFIG.get('ctree', 'temp'))
            ct_logger.info("Pushing changelog_details_* files to archive...")
            chlog_details_list = chgEvents.push_chlog_details(pathlist=ofiles,
                                                              db=self.db,
                                                              api=self.api,
                                                              dry=dry)
            chgEvents.print_changelog(ifile=chlog_f)
            ct_logger.info(
                "Updating and pushing to archive the updated CHANGELOG file..."
            )
            chlog_firepath = chgEvents.update_CHANGELOG(chlog_f,
                                                        db=self.db,
                                                        api=self.api,
                                                        dry=dry)
            ct_logger.info("Pushing to archive the new current.tree file...")
            ctree_firepath = self.push_ctree(dry=dry)
            ct_logger.info(
                "Pushing to archive the new current.tree file. DONE!")
            return {
                'chlog_details': chlog_details_list,
                'chlog_firepath': chlog_firepath,
                'ctree_firepath': ctree_firepath
            }