Esempio n. 1
0
 def _wrapper(*args, **kwargs):
     from . import podstatus_harvester
     from . import containerstatus_harvester
     from . import containerlog_harvester
     with LockSession(podstatus_harvester.get_client(),
                      3600 * 3) as lock_session1:
         with LockSession(containerstatus_harvester.get_client(),
                          3600 * 3) as lock_session2:
             with LockSession(containerlog_harvester.get_client(),
                              3600 * 3) as lock_session3:
                 func(*args, **kwargs)
Esempio n. 2
0
def clean_orphan_resources(repository):
    with LockSession(repository, 3600, 3000) as lock_session:
        all_resourceids = set()
        for meta in repository.resource_metadatas(
                throw_exception=False,
                current_resource=True,
                resource_status=ResourceConstant.ALL_RESOURCE):
            all_resourceids.add(meta["resource_id"])

        total = len(all_resourceids)
        logger.info("Found {} resources".format(total))

        lock_session.renew()
        data_path = repository.resource_data_path
        if data_path[-1] != "/":
            data_path = "{}/".format(data_path)

        orphan_resources = []
        for resource in repository.storage.list_resources(data_path):
            name = resource.name[len(data_path):]
            if name not in all_resourceids:
                orphan_resources.append(resource.name)

        logger.info("Found {} orphan resources".format(len(orphan_resources)))

        for resource in orphan_resources:
            repository.storage.delete(resource)
            logger.info(
                "Delete orphan resource '{}' from repository".format(resource))

        logger.info("Deleted {} orphan resources".format(
            len(orphan_resources)))
Esempio n. 3
0
def archive():
    with LockSession(get_resource_repository(), 3600, 3000) as lock_session:
        #archive the latest files
        files.archive(get_resource_repository(),
                      folder=settings.ARCHIVE_FOLDER,
                      recursive=True,
                      reserve_folder=settings.RESERVE_FOLDER,
                      archive=False,
                      file_filter=need_archive)
        #clean expired deleted resources from storage
        files.clean_expired_deleted_resources(get_resource_repository(),
                                              DELETED_RESROURCE_EXPIRED)
Esempio n. 4
0
def harvest(reconsume=False):
    with LockSession(get_resource_consume_client(), 3000) as lock_session:
        if not reconsume:
            #check whether some nginx configuration has been changed after last consuming.
            if get_resource_consume_client().is_behind(
                    resources=["nginx-config.yml", "nginx.yml"]):
                reconsume = True
            else:
                return 0

        #consume nginx config file
        return get_resource_consume_client().consume(
            process_nginx,
            resources=["nginx-config.yml", "nginx.yml"],
            reconsume=reconsume)
Esempio n. 5
0
def clean_resources(repository, delete_resource_filter, batch=None):
    """
    clean resources which is satisified with delete_resource_filter
    """

    logger.info("Begin to find all deleted resources")
    delete_resourceids = set()

    with LockSession(repository, 3600, 3000) as lock_session:
        total_resources = 0
        for meta in repository.resource_metadatas(
                throw_exception=False,
                current_resource=True,
                resource_status=ResourceConstant.ALL_RESOURCE):
            total_resources += 1
            if delete_resource_filter(meta):
                delete_resourceids.add(meta["resource_id"])

        total = len(delete_resourceids)
        logger.info("Found {}/{} deleted resources".format(
            total, total_resources))

        lock_session.renew()

        deleted = 0
        while deleted < total:
            with MetadataSession() as session:
                while deleted < total:
                    resourceid = delete_resourceids.pop()
                    repository.delete_resource(resourceid,
                                               permanent_delete=True)
                    logger.info(
                        "Permanently delete the file({}) from repository because it doesn't meet the filter condition"
                        .format(resourceid))
                    deleted += 1
                    lock_session.renew_if_needed()
                    if batch and deleted % batch == 0:
                        break
        logger.info("Permanently delete {}/{} resources".format(
            deleted, total))

        clean_orphan_resources(repository)
Esempio n. 6
0
def clean_expired_deleted_resources(repository, expire_time):
    """
    clean resources which is satisified with deleted_resource_filter
    """

    logger.info("Begin to find all expired deleted resources")
    expired_resourceids = set()
    with LockSession(repository, 3600, 3000) as lock_session:
        total_resources = 0
        now = timezone.now()
        for meta in repository.resource_metadatas(
                throw_exception=False,
                current_resource=True,
                resource_status=ResourceConstant.DELETED_RESOURCE):
            total_resources += 1
            if ResourceConstant.DELETE_TIME_KEY in meta and now > meta[
                    ResourceConstant.DELETE_TIME_KEY] + expire_time:
                expired_resourceids.add(meta["resource_id"])

        lock_session.renew()

        total = len(expired_resourceids)
        logger.info("Found {}/{} expired deleted resources".format(
            total, total_resources))

        deleted = 0

        with MetadataSession() as session:
            for resourceid in expired_resourceids:
                repository.delete_resource(resourceid, permanent_delete=True)
                logger.debug(
                    "Permanently delete the file({}) from repository because it doesn't meet the filter condition"
                    .format(resourceid))
                deleted += 1
                lock_session.renew_if_needed()
        logger.info("Permanently delete {}/{} resources".format(
            deleted, total))
Esempio n. 7
0
def harvest(reconsume=False):
    try:
        with LockSession(
                get_consume_client(),
                settings.NGINXLOG_MAX_CONSUME_TIME_PER_LOG) as lock_session:
            if reconsume and get_consume_client().is_client_exist(
                    clientid=settings.RESOURCE_CLIENTID):
                get_consume_client().delete_clients(
                    clientid=settings.RESOURCE_CLIENTID)

            if reconsume:
                WebAppAccessLog.objects.all().delete()
                WebAppAccessDailyLog.objects.all().delete()

            context = {"reconsume": reconsume, "lock_session": lock_session}
            #apply the latest filter change first
            context["path_normalizers"] = list(
                RequestPathNormalizer.objects.filter(
                    order__gt=0).order_by("-order"))
            context["path_filter"] = RequestPathNormalizer.objects.filter(
                order=0).first()
            context["parameter_filters"] = list(
                RequestParameterFilter.objects.all().order_by("-order"))
            context["path_normalizer_map"] = {}
            context["parameter_filter_map"] = {}
            """
            don't apply the changed rules in the history data
            applied = False
            while not applied:
                context["path_normalizers"] = list(RequestPathNormalizer.objects.filter(order__gt=0).order_by("-order"))
                context["path_filter"] = RequestPathNormalizer.objects.filter(order=0).first()
                context["path_normalizer_map"] = {}
                context["parameter_filters"] = list(RequestParameterFilter.objects.all().order_by("-order"))
                context["parameter_filter_map"] = {}
                applied = apply_rules(context)
            """

            #consume nginx config file
            result = get_consume_client().consume(process_log(context))
            #populate daily log
            lock_session.renew()
            #populate daily report
            WebAppAccessDailyReport.populate_data(lock_session)

            now = timezone.localtime()
            if now.hour >= 0 and now.hour <= 2:
                obj = WebAppAccessLog.objects.all().order_by(
                    "-log_starttime").first()
                if obj:
                    last_log_datetime = timezone.localtime(obj.log_starttime)
                    earliest_log_datetime = timezone.make_aware(
                        datetime(last_log_datetime.year,
                                 last_log_datetime.month,
                                 last_log_datetime.day)) - timedelta(
                                     days=settings.NGINXLOG_ACCESSLOG_LIFETIME)
                    sql = "DELETE FROM nginx_webappaccesslog where log_starttime < '{}'".format(
                        earliest_log_datetime.strftime(
                            "%Y-%m-%d 00:00:00 +8:00"))
                    with connection.cursor() as cursor:
                        logger.info(
                            "Delete expired web app access log.last_log_datetime={}, sql = {}"
                            .format(last_log_datetime, sql))
                        cursor.execute(sql)
                    lock_session.renew()

                obj = WebAppAccessDailyLog.objects.all().order_by(
                    "-log_day").first()
                if obj:
                    last_log_day = obj.log_day
                    earliest_log_day = last_log_day - timedelta(
                        days=settings.NGINXLOG_ACCESSDAILYLOG_LIFETIME)
                    sql = "DELETE FROM nginx_webappaccessdailylog where log_day < date('{}')".format(
                        earliest_log_day.strftime("%Y-%m-%d"))
                    with connection.cursor() as cursor:
                        logger.info(
                            "Delete expired web app access daily log.last_log_day={}, sql = {}"
                            .format(last_log_day, sql))
                        cursor.execute(sql)

            return result
    except exceptions.AlreadyLocked as ex:
        msg = "The previous harvest process is still running.{}".format(
            str(ex))
        logger.info(msg)
        return ([], [(None, None, None, msg)])
def harvest(reconsume=None,max_harvest_files=None,context={}):
    need_clean = [False]

    def _post_consume(client_consume_status,consume_result):
        now = timezone.localtime()
        if "next_clean_time" not in client_consume_status:
            client_consume_status["next_clean_time"] = timezone.make_aware(datetime(now.year,now.month,now.day)) + timedelta(days=1)
        elif now.hour > 6:
            return
        elif now >= client_consume_status["next_clean_time"]:
            need_clean[0] = True
            client_consume_status["next_clean_time"] = timezone.make_aware(datetime(now.year,now.month,now.day)) + timedelta(days=1)

    now = timezone.now()
    harvester = models.Harvester(name=harvestername,starttime=now,last_heartbeat=now,status=models.Harvester.RUNNING)
    harvester.save()
    message = None
    try:
        with LockSession(get_client(),settings.CONTAINERSTATUS_MAX_CONSUME_TIME_PER_LOG) as lock_session:
            try:
                if reconsume and get_client().is_client_exist(clientid=settings.RESOURCE_CLIENTID):
                    get_client().delete_clients(clientid=settings.RESOURCE_CLIENTID)
        
                context["containerstatus"] = context.get("containerstatus",{})
                context["containerstatus"] = {
                    "reconsume":reconsume  if reconsume is not None else context["containerstatus"].get("reconsume",False),
                    "max_harvest_files":max_harvest_files if max_harvest_files is not None else context["containerstatus"].get("max_harvest_files",None),
                    "lock_session":lock_session,
                    "new_deployed_workloads":set(),
                    "terminated_containers":set(),
                    "containers":{},
                    "harvester":harvester,
                    "harvested_files": 0
                }
                context["resourceclients"] = context.get("resourceclients",{})
                context["clusters"] = context.get("clusters",{})
                context["namespaces"] = context.get("namespaces",{})
                context["workloads"] = context.get("workloads",{})

                #consume container status file
                result = get_client().consume(process_status(context),f_post_consume=_post_consume)
                #change the status of containers which has no status data harvested in recent half an hour
                if result[1]:
                    if result[0]:
                        message = """Failed to harvest container status,
        {} container status files were consumed successfully.
        {}
        {} container status files were failed to consume
        {}"""
                        message = message.format(
                            len(result[0]),
                            "\n        ".join(["Succeed to harvest container status file '{}'".format(resource_ids) for resource_status,resource_status_name,resource_ids in result[0]]),
                            len(result[1]),
                            "\n        ".join(["Failed to harvest container status '{}'.{}".format(resource_ids,msg) for resource_status,resource_status_name,resource_ids,msg in result[1]])
                        )
                    else:
                        message = """Failed to harvest container status,{} container status files were failed to consume
        {}"""
                        message = message.format(
                            len(result[1]),
                            "\n        ".join(["Failed to harvest container status file '{}'.{}".format(resource_ids,msg) for resource_status,resource_status_name,resource_ids,msg in result[1]])
                        )
                elif result[0]:
                    message = """Succeed to harvest container status, {} container status files were consumed successfully.
        {}"""
                    message = message.format(
                        len(result[0]),
                        "\n        ".join(["Succeed to harvest container status file '{}'".format(resource_ids) for resource_status,resource_status_name,resource_ids in result[0]])
                    )
                else:
                    message = "Succeed to harvest container status, no new container status file was added since last harvesting"
    
    
                harvester.status = models.Harvester.FAILED if result[1] else models.Harvester.SUCCEED
            
                try:
                    if "last_archive_time" in context:
                        for container in models.Container.objects.filter(status__in=("Waiting",'Running'),last_checked__lt=context["last_archive_time"] - timedelta(minutes=30)):
                            container.status="LostHeartbeat"
                            container.save(update_fields=["status"])
                            update_latest_containers(context,container)
            
                    #save workload
                    for workload,workload_update_fields in context["workloads"].values():
                        if workload_update_fields:
                            workload.save(update_fields=workload_update_fields)
    
                except:
                    harvester.status = models.Harvester.FAILED
                    msg = "Failed to save changed Containers or Workloads.{}".format(traceback.format_exc())
                    logger.error(msg)
                    message = """{}
    =========Consuming Results================
    {}""".format(msg,message)
    
                return result
            except:
                harvester.status = models.Harvester.FAILED
                message = "Failed to harvest container status.{}".format(traceback.format_exc())
                logger.error(message)
                return ([],[(None,None,None,message)])
    except exceptions.AlreadyLocked as ex:
        harvester.status = models.Harvester.SKIPPED
        message = "The previous harvest process is still running.{}".format(str(ex))
        logger.warning(message)
        return ([],[(None,None,None,message)])
    finally:
        if need_clean[0]:
            try:
                check_aborted_containers(harvester,context)
                clean_expired_containers(harvester)
                message = """Succeed to clean expired containers.
{}""".format(message)
            except:
                harvester.status = models.Harvester.FAILED
                msg = "Failed to clean expired containers.{}".format(traceback.format_exc())
                logger.error(msg)
                message = """{}
=========Consuming Results================
{}""".format(msg,message)
        harvester.message = message
        harvester.endtime = timezone.now()
        harvester.last_heartbeat = harvester.endtime
        harvester.save(update_fields=["endtime","message","status","last_heartbeat"])
Esempio n. 9
0
def harvest(reconsume=None,max_harvest_files=None,context={}):
    need_clean = [False]

    def _post_consume(client_consume_status,consume_result):
        now = timezone.localtime()
        if "next_clean_time" not in client_consume_status:
            client_consume_status["next_clean_time"] = timezone.make_aware(datetime.datetime(now.year,now.month,now.day)) + datetime.timedelta(days=1)
        elif now.hour > 6:
            return
        elif now >= client_consume_status["next_clean_time"]:
            need_clean[0] = True
            client_consume_status["next_clean_time"] = timezone.make_aware(datetime.datetime(now.year,now.month,now.day)) + datetime.timedelta(days=1)

    now = timezone.now()
    harvester = models.Harvester(name=harvestername,starttime=now,last_heartbeat=now,status=models.Harvester.RUNNING)
    harvester.save()
    message = None
    try:
        with LockSession(get_client(),settings.CONTAINERLOG_MAX_CONSUME_TIME_PER_LOG) as lock_session:
            try:
                if reconsume:
                    if get_client().is_client_exist(clientid=settings.RESOURCE_CLIENTID):
                        get_client().delete_clients(clientid=settings.RESOURCE_CLIENTID)
                    modeldata.clean_containerlogs()
        
        
                context["logstatus"] = context.get("logstatus",{})
                context["logstatus"] = {
                    "reconsume":reconsume  if reconsume is not None else context["logstatus"].get("reconsume",False),
                    "max_harvest_files":max_harvest_files if max_harvest_files is not None else context["logstatus"].get("max_harvest_files",None),
                    "lock_session":lock_session,
                    "containerlogs":{},
                    "harvester":harvester,
                    "containers":{},
                    "harvested_files": 0
                }
                context["resourceclients"] = context.get("resourceclients",{})
                context["clusters"] = context.get("clusters",{})
                context["workloads"] = context.get("workloads",{})
                #consume container log file
                result = get_client().consume(process_status(context),f_post_consume=_post_consume)
        
                if result[1]:
                    if result[0]:
                        message = """Failed to harvest container log,
        {} container log files were consumed successfully.
        {}
        {} container log files were failed to consume
        {}"""
                        message = message.format(
                            len(result[0]),
                            "\n        ".join(["Succeed to harvest container log file '{}'".format(resource_ids) for resource_status,resource_status_name,resource_ids in result[0]]),
                            len(result[1]),
                            "\n        ".join(["Failed to harvest container log '{}'.{}".format(resource_ids,msg) for resource_status,resource_status_name,resource_ids,msg in result[1]])
                        )
                    else:
                        message = """Failed to harvest container log,{} container log files were failed to consume
        {}"""
                        message = message.format(
                            len(result[1]),
                            "\n        ".join(["Failed to harvest container log file '{}'.{}".format(resource_ids,msg) for resource_status,resource_status_name,resource_ids,msg in result[1]])
                        )
                elif result[0]:
                    message = """Succeed to harvest container log, {} container log files were consumed successfully.
        {}"""
                    message = message.format(
                        len(result[0]),
                        "\n        ".join(["Succeed to harvest container log file '{}'".format(resource_ids) for resource_status,resource_status_name,resource_ids in result[0]])
                    )
                else:
                    message = "Succeed to harvest container log, no new container log file was added since last harvesting"
            
                harvester.status = models.Harvester.FAILED if result[1] else models.Harvester.SUCCEED
                return result
            except:
                harvester.status = models.Harvester.FAILED
                message = "Failed to harvest container log.{}".format(traceback.format_exc())
                logger.error(message)
                return ([],[(None,None,None,message)])
    except exceptions.AlreadyLocked as ex: 
        harvester.status = models.Harvester.SKIPPED
        message = "The previous harvest process is still running.{}".format(str(ex))
        logger.warning(message)
        return ([],[(None,None,None,message)])
    finally:
        if need_clean[0]:
            try:
                clean_expired_containerlogs(harvester)
                message = """Succeed to clean expired containers.
=========Consuming Results================
{}""".format(message)
            except:
                harvester.status = models.Harvester.FAILED
                msg = "Failed to clean expired container logs.{}".format(traceback.format_exc())
                logger.error(msg)
                message = """{}
=========Consuming Results================
{}""".format(msg,message)

        harvester.message = message
        harvester.endtime = timezone.now()
        harvester.last_heartbeat = harvester.endtime
        harvester.save(update_fields=["endtime","message","status","last_heartbeat"])
Esempio n. 10
0
def harvest(reconsume=None,max_harvest_files=None,context={}):
    now = timezone.now()
    harvester = models.Harvester(name=harvestername,starttime=now,last_heartbeat=now,status=models.Harvester.RUNNING)
    harvester.save()
    message = None
    try:
        with LockSession(get_client(),settings.PODSTATUS_MAX_CONSUME_TIME_PER_LOG) as lock_session:
            if reconsume and get_client().is_client_exist(clientid=settings.RESOURCE_CLIENTID):
                get_client().delete_clients(clientid=settings.RESOURCE_CLIENTID)

            context["podstatus"] = context.get("podstatus",{})
            context["podstatus"].update({
                "reconsume":reconsume  if reconsume is not None else context["podstatus"].get("reconsume",False),
                "max_harvest_files":max_harvest_files if max_harvest_files is not None else context["podstatus"].get("max_harvest_files",None),
                "lock_session":lock_session,
                "removable_workloads":set(),
                "orphan_namespaces":set(),
                "harvester":harvester,
                "harvested_files": 0
            })

            context["clusters"] = context.get("clusters",{})
            context["namespaces"] = context.get("namespaces",{})
            context["workloads"] = context.get("workloads",{})

            #consume pod status file
            result = get_client().consume(process_status(context))
    
            if result[1]:
                if result[0]:
                    message = """Failed to harvest pod status,
    {} pod status files were consumed successfully.
    {}
    {} pod status files were failed to consume
    {}"""
                    message = message.format(
                        len(result[0]),
                        "\n        ".join(["Succeed to harvest pod status file '{}'".format(resource_ids) for resource_status,resource_status_name,resource_ids in result[0]]),
                        len(result[1]),
                        "\n        ".join(["Failed to harvest pod status '{}'.{}".format(resource_ids,msg) for resource_status,resource_status_name,resource_ids,msg in result[1]])
                    )
                else:
                    message = """Failed to harvest pod status,{} pod status files were failed to consume
    {}"""
                    message = message.format(
                        len(result[1]),
                        "\n        ".join(["Failed to harvest pod status file '{}'.{}".format(resource_ids,msg) for resource_status,resource_status_name,resource_ids,msg in result[1]])
                    )
            elif result[0]:
                message = """Succeed to harvest pod status, {} pod status files were consumed successfully.
    {}"""
                message = message.format(
                    len(result[0]),
                    "\n        ".join(["Succeed to harvest pod status file '{}'".format(resource_ids) for resource_status,resource_status_name,resource_ids in result[0]])
                )
            else:
                message = "Succeed to harvest pod status, no new pod status file was added since last harvesting"
                
            harvester.status = models.Harvester.FAILED if result[1] else models.Harvester.SUCCEED
            return result

    except exceptions.AlreadyLocked as ex:
        harvester.status = models.Harvester.SKIPPED
        message = "The previous harvest process is still running.{}".format(str(ex))
        logger.warning(message)
        return ([],[(None,None,None,message)])
    except:
        harvester.status = models.Harvester.FAILED
        message = "Failed to harvest pod status.{}".format(traceback.format_exc())
        logger.error(message)
        return ([],[(None,None,None,message)])
    finally:
        harvester.message = message
        harvester.endtime = timezone.now()
        harvester.last_heartbeat = harvester.endtime
        harvester.save(update_fields=["endtime","message","status","last_heartbeat"])
Esempio n. 11
0
def sync_dependent_tree(workload_changetime=None,
                        cluster_lock_sessions=None,
                        rescan=False,
                        rescan_resource=False,
                        rescan_dependency=False):
    """
    Sync the dependent tree if required.
    This function is synchronized against the rancher configuration storage 
    cluster_lock_sessions is a list of tuple(cluster, cluster_lock_session)
    workload_changetime if not none, the workloads which were changed after latest_workload_changetime will be processed
    """
    from .rancher_harvester import get_client
    release_lock = False
    try:
        if not cluster_lock_sessions:
            cluster_lock_sessions = []
            release_lock = True
            for cluster in models.Cluster.objects.filter(added_by_log=False):
                cluster_lock_sessions.append(
                    (cluster, LockSession(get_client(cluster.name), 3000,
                                          1500)))

        def _renew_locks():
            for cluster, lock_session in cluster_lock_sessions:
                lock_session.renew_if_needed()

        scan_time = timezone.now()
        scan_modules = list(
            models.EnvScanModule.objects.filter(
                active=True).order_by("-priority"))
        qs = models.Workload.objects.filter(
            cluster__in=[o[0] for o in cluster_lock_sessions])
        if workload_changetime:
            qs = qs.filter(
                Q(updated__gte=workload_changetime)
                | Q(deleted__gte=workload_changetime))
        qs = qs.order_by("cluster__name", "namespace__name", "name")

        wl_cache = {}
        dependency_cache = {}
        wls = []
        #scan resources if required
        for wl in qs:
            logger.debug("Scan resource for workload({}<{}>)".format(
                wl, wl.id))
            try:
                wl.scan_resource(rescan=rescan_resource,
                                 scan_modules=scan_modules,
                                 scan_time=scan_time)
            except:
                logger.error(
                    "Failed to scan the resource of the workload({}).{}".
                    format(wl, traceback.format_exc()))

            _renew_locks()
            wl_cache[wl.id] = wl
            wls.append(wl)

        #rescan dependency if required
        for wl in wls:
            logger.debug("Scan dependency for workload({}<{}>)".format(
                wl, wl.id))
            wl.scan_dependency(rescan=rescan_dependency,
                               f_renew_lock=_renew_locks)

        # repopulate the dependent tree
        dep_wlids = set()
        dep_wls = []
        now = timezone.now()
        for wl in wls:
            dependency_cache.clear()
            update_workload_dependent_tree(wl,
                                           wl_cache=wl_cache,
                                           dependency_cache=dependency_cache,
                                           renew_locks=_renew_locks)
            update_resource_dependent_tree(wl,
                                           wl_cache=wl_cache,
                                           dependency_cache=dependency_cache,
                                           renew_locks=_renew_locks)
    finally:
        #release the locks
        if release_lock:
            for cluster, lock_session in cluster_lock_sessions:
                try:
                    lock_session.release()
                except Exception as ex:
                    logger.error("Failed to release the lock.{}".format(
                        str(ex)))
Esempio n. 12
0
def archive(repository,
            files=None,
            folder=None,
            recursive=False,
            file_filter=None,
            reserve_folder=True,
            archive=True,
            checking_policy=[FILE_MD5]):
    """
    Archive the files or files in folder and push it to azure blob resource
    files: the file or list of files for archive
    folder: all the files in the folder will be archived
    recursive: only used for folder, if true, all the files in the folder and nested folder will be archived.
    file_filter: only used for folder, if not none, only the files which satisfy the filter will be archived
    reserve_folder: only used for folder, if true, the relative folder in folder will be reserved when push to repository
    archive: if true, each file version will be saved in repository
    checking_policy: the policy to check whether file is modified or not. can be single policy or list of policy
    """

    if not files and not folder:
        raise Exception("Either files or folder must be specified. ")

    if files and folder:
        raise Exception("Can't set files or folder at the same time ")

    if not checking_policy:
        checking_policy = [FILE_MD5]
    elif not isinstance(checking_policy, (list, tuple)):
        checking_policy = [checking_policy]
    check_md5 = FILE_MD5 in checking_policy

    with LockSession(repository, 3600, 3000) as lock_session:
        with MetadataSession() as session:
            if files:
                if not isinstance(files, (tuple, list)):
                    archive_files = [(os.path.abspath(files),
                                      os.path.split(files)[1])]
                else:
                    archive_files = [(os.path.abspath(f), os.path.split(f)[1])
                                     for f in files]

                #check whether file exist or not.
                for f, resource_id in archive_files:
                    if os.path.exists(f):
                        raise Exception("File {} does not exist".format(f))
                    elif not os.path.isfile(f):
                        raise Exception("{} is not a file".format(f))
                    else:
                        _archive_file(repository, f, resource_id,
                                      checking_policy, check_md5)
                        lock_session.renew_if_needed()
            else:
                non_exist_resourceids = {}
                for meta in repository.resource_metadatas(
                        throw_exception=False,
                        current_resource=True,
                        resource_status=ResourceConstant.ALL_RESOURCE):
                    non_exist_resourceids[meta["resource_id"]] = meta.get(
                        ResourceConstant.DELETED_KEY, False)

                folder = os.path.abspath(folder)
                folders = [folder]
                f_path = None
                resource_id = None
                while folders:
                    cur_folder = folders.pop(0)
                    for f in os.listdir(cur_folder):
                        f_path = os.path.join(cur_folder, f)
                        if os.path.isfile(f_path):
                            if not file_filter or file_filter(
                                    os.path.relpath(f_path, folder)):
                                if reserve_folder:
                                    resource_id = os.path.relpath(
                                        f_path, folder)
                                else:
                                    resource_id = os.path.split(f_path)[1]
                                _archive_file(repository,
                                              f_path,
                                              resource_id,
                                              checking_policy,
                                              check_md5,
                                              metadata={"folder": folder})
                                lock_session.renew_if_needed()
                                if resource_id in non_exist_resourceids:
                                    del non_exist_resourceids[resource_id]
                            else:
                                pass
                                #logger.debug("File({}) is filtered out by file filter,ignore".format(f_path))

                        elif os.path.isdir(f_path):
                            if recursive:
                                folders.append(f_path)
                            else:
                                logger.debug(
                                    "Recursive is False and {} is a sub folder,ignore"
                                    .format(f_path))

                        else:
                            logger.debug(
                                "{} is not a regular file and folder,ignore".
                                format(f_path))

                for resourceid, is_deleted in non_exist_resourceids.items():
                    if not file_filter or file_filter(resourceid):
                        if not is_deleted:
                            repository.delete_resource(resourceid,
                                                       permanent_delete=False)
                            lock_session.renew_if_needed()
                            logger.debug(
                                "Logically delete the file({}) from repository because it doesn't exist anymore"
                                .format(resourceid))
                    else:
                        repository.delete_resource(resourceid,
                                                   permanent_delete=True)
                        logger.debug(
                            "Permanently delete the file({}) from repository because it doesn't meet the filter condition"
                            .format(resourceid))