예제 #1
0
def run(selections,args):

    # BEWARE: tricky statement
    #
    # 'upgrade' is a multi-selections 'subcommand' which do the same as the
    # mono-selection 'install' subcommand, but for many selections.  What we do
    # here is replace 'upgrade' subcommand with 'install' subcommand, so that we can,
    # now that we are in 'upgrade' func/context, 
    # come back to the existing mono-selection func,
    # for each selection, with 'install' subcommand.
    #
    args.subcommand='install'

    # force non-interactive mode
    args.yes=True

    exclude_selection_files=get_exclude(args)

    for selection in selections:

        if selection.filename in exclude_selection_files:
            continue

        try:
            sdlog.info("SDUPGRAD-003","Process %s.."%selection.filename,stdout=True)
            install(args,selection)
        except sdexception.IncorrectParameterException,e:
            sdlog.error("SDUPGRAD-004","Error occurs while processing %s (%s)"%(selection.filename,str(e)),stderr=True)
        except sdexception.SDException,e:
            sdlog.error("SDUPGRAD-008","Error occurs while processing %s (%s)"%(selection.filename,str(e)),stderr=True)
예제 #2
0
def run(files):
    (keep,reject)=filter(files)

    if len(reject)>0:
        sdlog.info("SDPOSXPC-001","%i anomalies found"%len(reject))

    return keep
예제 #3
0
def qualitycheck_ok(dataset_versions,d):
    """
    based on some statistics, this method accepts or deny 'latest' promotion for the dataset 'd'

    return
        false if 'd' don't seem ready to be promoted to 'latest'
        true  if 'd' seems ready to be promoted to 'latest'
    """

    # retrieve stats for current latest flagged version
    latest_dataset=dataset_versions.get_dataset_with_latest_flag_set()
    current_version_stats=latest_dataset.statistics

    # retrieve stats for candidate version for 'latest' promotion
    candidate_stats=sddatasetquery.get_dataset_stats(d)

    # assert
    if latest_dataset.dataset_id==d.dataset_id:
        raise SDException("SYDDFLAG-140","fatal error (%i)"%d.dataset_id)

    # variable number quality check
    if candidate_stats['variable_count'] < (current_version_stats['variable_count'] * 0.5) : # if variable number drops
        sdlog.info("SYDDFLAG-730","%s"%d.get_full_local_path())
        return False

    # total file number quality check
    """
    if candidate_stats.getFilesCount() < current_version_stats.getFilesCount(): # if file number decrease
        sdlog.info("SYDDFLAG-734","%s"%d.get_full_local_path())
        return False
    """

    return True
예제 #4
0
def use_file_timestamp_if_dataset_timestamp_is_missing(d):

    if 'timestamp' not in d:
        # timestamp doesn't exist in ESGF for this dataset

        # hack
        #
        # Use a dataset's (random (i.e. files have not always the same even
        # timestmap in one dataset, so we take one randomly)) file timestamp
        # as dataset's timestamp is missing in ESGF !

        # Note
        #     We do not filter replica in the query below in case the master host is not up
        result=sdquicksearch.run(parameter=['limit=1','fields=instance_id,timestamp,type','type=File','dataset_id=%s'%d['instance_id']],post_pipeline_mode=None)
        if len(result.files)>0:
            file=result.files[0]

            if 'timestamp' in file:

                d['timestamp']=file['timestamp']

                sdlog.info("SDTIMEST-001","Dataset timestamp set from one dataset's file's timestamp (dataset_functional_id=%s,file_functional_id=%s)"%(d['instance_id'],file['instance_id']))
            else:
                raise SDException("SDTIMEST-008","Timestamp missing in both dataset and dataset's file(s) (%s)"%d['instance_id'])
        else:
            raise SDException("SDTIMEST-011","Dataset exist in ESGF, but is empty (%s)"%d['instance_id'])
예제 #5
0
def set_timestamp_when_empty__BATCH_MODE_1():
    """
    Retrieve *all* datasets from ESGF, then update local timestamp.

    Not used.
    """
    datasets = sddump.dump_ESGF(parameter=['searchapi_host=esgf-data.dkrz.de'],
                                fields=sdfields.get_timestamp_fields())

    sdlog.info("SDREBUIL-008",
               "%i dataset(s) retrieved from ESGF." % len(datasets))
    sdlog.info("SDREBUIL-012", "Start updating timestamp in local database.")

    for i, d in enumerate(datasets):

        if 'instance_id' in d:  # this is because some dataset have no instance_id in ESGF !
            dataset = sddatasetdao.get_dataset(
                dataset_functional_id=d['instance_id'])
            if dataset is not None:
                if 'timestamp' in d:  # this is because some dataset have no timestamp in ESGF !
                    dataset.timestamp = d['timestamp']
                    sddatasetdao.update_dataset(dataset,
                                                commit=False,
                                                keys=['timestamp'])

        SDProgressBar.print_progress_bar(
            len(datasets), i, title="Updating dataset's timestamp.. ")

    SDProgressBar.progress_complete()

    sddb.conn.commit()
예제 #6
0
def set_timestamp_when_empty__BATCH_MODE_1():
    """
    Retrieve *all* datasets from ESGF, then update local timestamp.

    Not used.
    """
    datasets=sddump.dump_ESGF(['type=Dataset','searchapi_host=esgf-data.dkrz.de'],'timestamp')

    sdlog.info("SDREBUIL-008","%i dataset(s) retrieved from ESGF."%len(datasets))
    sdlog.info("SDREBUIL-012","Start updating timestamp in local database.")

    for i,d in enumerate(datasets):

        if 'instance_id' in d: # this is because some dataset have no instance_id in ESGF !
            dataset=sddatasetdao.get_dataset(dataset_functional_id=d['instance_id'])
            if dataset is not None:
                if 'timestamp' in d: # this is because some dataset have no timestamp in ESGF !
                    dataset.timestamp=d['timestamp']
                    sddatasetdao.update_dataset(dataset,commit=False,keys=['timestamp'])

        SDProgressBar.print_progress_bar(len(datasets),i,title="Updating dataset's timestamp.. ")

    SDProgressBar.progress_complete()

    sddb.conn.commit()
예제 #7
0
    def call_web_service(self, request):

        sdlog.debug("SYDPROXY-100",
                    "Search-API call started (%s)." % request.get_url())

        try:
            response = sdnetutils.call_web_service(
                request.get_url(), timeout=sdconst.SEARCH_API_HTTP_TIMEOUT
            )  # returns Response object
        except:

            # if exception occurs in sdnetutils.call_web_service() method, all
            # previous calls to this method inside this paginated call are also
            # cancelled

            # we reset the offset so the paginated call can be restarted from the begining the next time
            # (maybe overkill as offset is reinitialized when entering 'call_web_service__PAGINATION()' func)
            request.offset = 0

            raise

        sdlog.info(
            "SYDPROXY-100",
            "Search-API call completed (returned-files-count=%i,match-count=%i,url=%s)."
            % (response.count(), response.num_found, request.get_url()))

        return response
예제 #8
0
def update_datasets_status_HELPER(datasets):
    """
    Refresh datasets status flag

    Note
        This func doesn't handle the 'latest' flag
    """
    datasets_modified_count = 0

    for d in datasets:

        # store dataset current state
        l__status = d.status

        # compute new 'status' flag
        d.status = compute_dataset_status(d)
        sddatasetdao.update_dataset(d)

        # check if the dataset has changed
        if l__status != d.status:
            sdlog.info(
                "SYDDFLAG-188",
                "Dataset status has been changed from %s to %s (%s)" %
                (l__status, d.status, d.dataset_functional_id))
            datasets_modified_count += 1

    sdlog.info("SYDDFLAG-192",
               "Modified datasets: %i" % datasets_modified_count)
예제 #9
0
def submit(
    order_name,
    project,
    model,
    dataset,
    variable='',
    filename='',
    commit=True
):  # TODO: replace single quote with None and move 'None2SingleQuote' processing inside Event object (and add comment about why we use single quote instead of None in event table !!!)

    event_name = order_name

    dataset_pattern = sdproduct.replace_output12_product_with_wildcard(dataset)
    filename_pattern = filename

    sdlog.info(
        "SDPPORDE-001",
        "'%s' triggered (%s,%s)" % (event_name, dataset_pattern, variable))

    event = Event(name=event_name)

    event.project = project
    event.model = model
    event.dataset_pattern = dataset_pattern
    event.variable = variable
    event.filename_pattern = filename_pattern
    event.crea_date = sdtime.now()
    event.priority = sdconst.DEFAULT_PRIORITY

    sdeventdao.add_event(event, commit=commit)
예제 #10
0
def immediate_md_delete(tr):
    """Delete file (metadata only)."""
    sdlog.info("SDDELETE-080","Delete metadata (%s)"%tr.get_full_local_path())
    try:
        sdfiledao.delete_file(tr,commit=False)
    except Exception,e:
        sdlog.error("SDDELETE-128","Error occurs during file metadata suppression (%s,%s)"%(tr.get_full_local_path(),str(e)))
예제 #11
0
파일: sddb.py 프로젝트: Prodiguer/synda
def disconnect():
    global conn

    if is_connected():
        conn.close()

    conn=None

    # hack
    #
    # force sqlite db file to be group writable
    #
    # It should be done with umask when creating the db, but seems not working due to a bug.
    #
    # more info
    #   http://www.mail-archive.com/[email protected]/msg59080.html
    #   https://code.djangoproject.com/ticket/19292
    #
    if os.path.exists(sdconfig.db_file):
        if not sdtools.is_group_writable(sdconfig.db_file):
            if sdtools.set_file_permission(sdconfig.db_file):
                sdlog.info("SDDATABA-003","File permissions have been modified ('%s')"%sdconfig.db_file)
            else:
                # we come here when user have not enough priviledge to set file permission

                sdlog.info("SDDATABA-004","Missing privilege to modify file permissions ('%s')"%sdconfig.db_file)
예제 #12
0
def qualitycheck_ok(dataset_versions, d):
    """
    based on some statistics, this method accepts or deny 'latest' promotion for the dataset 'd'

    return
        false if 'd' don't seem ready to be promoted to 'latest'
        true  if 'd' seems ready to be promoted to 'latest'
    """

    # retrieve stats for current latest flagged version
    latest_dataset = dataset_versions.get_dataset_with_latest_flag_set()
    current_version_stats = latest_dataset.statistics

    # retrieve stats for candidate version for 'latest' promotion
    candidate_stats = sddatasetquery.get_dataset_stats(d)

    # assert
    if latest_dataset.dataset_id == d.dataset_id:
        raise SDException("SYDDFLAG-140", "fatal error (%i)" % d.dataset_id)

    # variable number quality check
    if candidate_stats['variable_count'] < (
            current_version_stats['variable_count'] *
            0.5):  # if variable number drops
        sdlog.info("SYDDFLAG-730", "%s" % d.get_full_local_path())
        return False

    # total file number quality check
    """
    if candidate_stats.getFilesCount() < current_version_stats.getFilesCount(): # if file number decrease
        sdlog.info("SYDDFLAG-734","%s"%d.get_full_local_path())
        return False
    """

    return True
예제 #13
0
def run(args, metadata=None):
    import syndautils

    syndautils.check_daemon()

    if metadata is None:

        # retrieve metadata

        if args.incremental and not args.selection_file:
            print_stderr(
                "ERROR: 'selection_file' option is not set (a selection file must be used when 'incremental' option is set)"
            )
            return (1, 0)

        if args.selection_file is not None:
            sdlog.info("SYNDINST-006", "Process '%s'" % args.selection_file)

        try:
            metadata = syndautils.file_full_search(args)
        except sdexception.EmptySelectionException, e:
            print_stderr('No dataset will be installed, upgraded, or removed.')
            return (0, 0)
        except sdexception.SDException, e:
            sdlog.info("SYNDINST-006",
                       "Exception occured during installation ('%s')" % str(e))
            raise
예제 #14
0
def get_RTT(remote_host):

    if remote_host not in sdgc.RTT_cache:
        sdlog.info("SDNEARES-012","Compute RTT for '%s' host."%remote_host)
        sdgc.RTT_cache[remote_host]=compute_RTT(remote_host)

    return sdgc.RTT_cache[remote_host]
예제 #15
0
파일: sddelete.py 프로젝트: Prodiguer/synda
def run(metadata):
    """
    Set files status to "delete"

    Returns:
        Number of deleted items.

    Note
        - the func only change the status (i.e. data and metadata will be removed later by the daemon)
    """

    if metadata.count() < 1:
        return 0

    f=metadata.get_one_file()
    selection_filename=sdpostpipelineutils.get_attached_parameter__global([f],'selection_filename') # note that if no files are found at all for this selection (no matter the status), then the filename will be blank

    # TODO: merge both to improve perf
    metadata=sdsimplefilter.run(metadata,'status',sdconst.TRANSFER_STATUS_NEW,'remove')
    metadata=sdsimplefilter.run(metadata,'status',sdconst.TRANSFER_STATUS_DELETE,'remove')

    count=metadata.count()

    if count>0:
        po=sdpipelineprocessing.ProcessingObject(delete)
        metadata=sdpipelineprocessing.run_pipeline(metadata,po)
        sddb.conn.commit() # final commit (we do all update in one transaction).

        sdhistorydao.add_history_line(sdconst.ACTION_DELETE,selection_filename)

        sdlog.info("SDDELETE-929","%i files marked for deletion (selection=%s)"%(count,selection_filename))

    return count
예제 #16
0
def get_datasets_timestamps(squeries,parallel):

    # switch url
    for q in squeries:
        q['url_tmp']=q['url']

        if 'dataset_timestamp_url' not in q:

            sdlog.info("SYNDABTI-300","dataset_timestamp_url not found in query")

            raise MissingDatasetTimestampUrlException() # just in case (should be always set for 'install' action)

        q['url']=q['dataset_timestamp_url']

    # run
    datasets=sdrun.run(squeries,parallel)

    # transform to dict for quick random access
    di={}
    for d in datasets:
        instance_id=d['instance_id']

        try:
            timestamp=get_timestamp(instance_id,d)
            di[instance_id]=timestamp
        except MissingTimestampException, e:
            sdlog.info("SYNDABTI-500","dataset found but dataset timestamp is missing (%s)"%instance_id)
예제 #17
0
def change_replica(file_functional_id,new_replica,conn=sddb.conn):
    (url,data_node)=new_replica
    sdlog.info("SDMODIFQ-001","Set new replica for %s file (new_url=%s,new_dn=%s)"%(file_functional_id,url,data_node))
    c=conn.cursor()
    res=c.execute("update file set url=?,data_node=? where file_functional_id=?",(url,data_node,file_functional_id))
    conn.commit()
    c.close()
예제 #18
0
def run(files):
    (keep,reject)=filter(files)

    if len(reject)>0:
        sdlog.info("SDPOSXPC-001","%i malformed file(s) found"%len(reject))

    return keep
예제 #19
0
def uniq(metadata):

    if metadata.count() < 1:
        return metadata

    # retrieve global flag
    f=metadata.get_one_file()
    keep_replica=sdpostpipelineutils.get_attached_parameter__global([f],'keep_replica')
    functional_id_keyname=sdpostpipelineutils.get_functional_identifier_name(f)

    if keep_replica=='true':
        # Keep replica.
        # In this case, we remove type-A duplicates, but we keep type-B duplicates (i.e. replicas)

        # uniq key => id (i.e. including datanode)

        sdlog.info("SSHRINKU-001","Remove duplicate..")

        metadata=sdrmdup.run(metadata,functional_id_keyname)
    else:
        # Do not keep replica.
        # In this case, we remove type-A and type-B duplicates by randomly keeping one candidate

        # uniq key => instance_id (i.e. excluding datanode)

        sdlog.info("SSHRINKU-002","Remove duplicate and replicate..")

        metadata=sdrmduprep.run(metadata,functional_id_keyname)

    return metadata
예제 #20
0
def get_urls(file_functional_id):
    result = sdquicksearch.run(parameter=[
        'limit=1',
        'fields=%s' % url_fields, 'type=File',
        'instance_id=%s' % file_functional_id
    ],
                               post_pipeline_mode=None)
    li = result.get_files()
    if len(li) > 0:
        file_ = li[0]

        # remove non url attributes
        try:
            del file_['attached_parameters']
        except Exception as e:
            pass

        urls = file_

    else:
        sdlog.info(
            "SDNEXTUR-090", "File not found (file_functional_id=%s)" %
            (tr.file_functional_id, ))
        raise sdexception.FileNotFoundException()

    return urls
예제 #21
0
def call_web_service(url,timeout=sdconst.SEARCH_API_HTTP_TIMEOUT,lowmem=False): # default is to load list resulting from HTTP call in memory (should work on lowmem machine as response should not exceed SEARCH_API_CHUNKSIZE)
    start_time=SDTimer.get_time()
    buf=HTTP_GET(url,timeout)
    elapsed_time=SDTimer.get_elapsed_time(start_time)

    buf=fix_encoding(buf)

    try:
        di=search_api_parser.parse_metadata(buf)
    except Exception,e:

        # If we are here, it's likely that they is a problem with the internet connection
        # (e.g. we are behind an HTTP proxy and have no authorization to use it)

        sdlog.info('SDNETUTI-001','XML parsing error (exception=%s). Most of the time, this error is due to a network error.'%str(e))

        # debug
        #
        # TODO: maybe always enable this
        #
        sdtrace.log_exception()

        # debug
        #
        # (if the error is not due to a network error (e.g. internet connection
        # problem), raise the original exception below and set the debug mode
        # to see the stacktrace.
        #
        #raise

        raise SDException('SDNETUTI-008','Network error (see log for details)') # we raise a new exception 'network error' here, because most of the time, 'xml parsing error' is due to an 'network error'.
예제 #22
0
def process_async_event(): # 'async' is because event are waiting in 'event' table before being proceeded
    events=sdeventdao.get_events(status=sdconst.EVENT_STATUS_NEW,limit=200) # process 200 events at a time (arbitrary)

    if len(events)>0:

        try:
            sdppproxy.event(events)

            for e in events:
                e.status=sdconst.EVENT_STATUS_OLD

            sdeventdao.update_events(events,commit=False)
            sddb.conn.commit()
            sdlog.info("SYNDTASK-001","Events status succesfully updated")
        except RemoteException,e: # non-fatal
            sddb.conn.rollback()
            sdlog.info("SYNDTASK-002","Error occurs during event processing (%s)"%str(e))
        except Exception,e: # fatal
            sddb.conn.rollback()
            sdlog.error("SYNDTASK-018","Fatal error occurs during event processing (%s)"%str(e))

            # debug
            #traceback.print_exc(file=open(sdconfig.stacktrace_log_file,"a"))

            raise
예제 #23
0
def terminate(signal,frame):
    global quit

    import sdlog

    print # this print is just not to display the msg below on the same line as ^C

    sdlog.info("SDTSCHED-004","Shutdown in progress..",stderr=True)

    if scheduler_state!=1: # we can only stop the scheduler if it is running
        sdlog.info("SDTSCHED-009","The daemon is not running (scheduler_state=%s)"%scheduler_state)
        return

    sdwatchdog.quit=1
    quit=1

    # kill all childs (i.e. abort running transfer(s) if any)
    import psutil
    parent = psutil.Process(os.getpid())

    # NEW WAY
    # see TAG54353543DFDSFD for info
    #
    if hasattr(parent, 'get_children'):
        for child in parent.get_children(True):
            if child.is_running():
                child.terminate()
    else:
        for child in parent.children(True):
            if child.is_running():
                child.terminate()

    # OLD
    """
예제 #24
0
def process_async_event(): # 'async' is because event are waiting in 'event' table before being proceeded
    events=sdeventdao.get_events(status=sdconst.EVENT_STATUS_NEW,limit=200) # process 200 events at a time (arbitrary)

    if len(events)>0:

        try:
            sdppproxy.event(events)

            for e in events:
                e.status=sdconst.EVENT_STATUS_OLD

            sdeventdao.update_events(events,commit=False)
            sddb.conn.commit()
            sdlog.info("SYNDTASK-001","Events status succesfully updated")
        except RemoteException,e: # non-fatal
            sddb.conn.rollback()
            sdlog.info("SYNDTASK-002","Error occurs during event processing (%s)"%str(e))
        except Exception,e: # fatal
            sddb.conn.rollback()
            sdlog.error("SYNDTASK-018","Fatal error occurs during event processing (%s)"%str(e))

            # debug
            #sdtrace.log_exception()

            raise
예제 #25
0
def disconnect():
    global conn

    if is_connected():
        conn.close()

    conn = None

    # hack
    #
    # force sqlite db file to be group writable
    #
    # It should be done with umask when creating the db, but seems not working due to a bug.
    #
    # more info
    #   http://www.mail-archive.com/[email protected]/msg59080.html
    #   https://code.djangoproject.com/ticket/19292
    #
    if os.path.exists(sdconfig.db_file):
        if not sdtools.is_group_writable(sdconfig.db_file):
            if sdtools.set_file_permission(sdconfig.db_file):
                sdlog.info(
                    "SDDATABA-003",
                    "File permissions have been modified ('%s')" %
                    sdconfig.db_file)
            else:
                # we come here when user have not enough priviledge to set file permission

                sdlog.info(
                    "SDDATABA-004",
                    "Missing privilege to modify file permissions ('%s')" %
                    sdconfig.db_file)
예제 #26
0
def load_table_in_memory(tablename, indexname):
    global _in_memory_conn

    sdlog.info("SDDATABA-001", "loading '%s' table" % tablename)

    # create a database in memory
    if _in_memory_conn is None:
        _in_memory_conn = sqlite3.connect(":memory:")

    # attach persistent DB
    _in_memory_conn.execute("ATTACH '%s' AS persistentdb" % get_db_name())

    # drop table if already exists in memory
    _in_memory_conn.execute("drop table if exists main.'%s'" % tablename)
    # copy table from persistent DB to memory
    _in_memory_conn.execute(
        "create table main.'%s' as select * from persistentdb.[%s]" %
        (tablename, tablename))
    # create index
    _in_memory_conn.execute(
        "create index if not exists main.'%s' on '%s' (file)" %
        (indexname, tablename))

    # commit
    _in_memory_conn.commit()

    # detach persistent DB
    _in_memory_conn.execute("detach persistentdb")

    _in_memory_conn.row_factory = sqlite3.Row

    sdlog.info("SDDATABA-002", "table loaded")
예제 #27
0
def change_replica(file_functional_id,new_replica,conn=sddb.conn):
    (url,data_node)=new_replica
    sdlog.info("SDMODIFQ-001","Set new replica for %s file (new_url=%s,new_dn=%s)"%(file_functional_id,url,data_node))
    c=conn.cursor()
    res=c.execute("update file set url=?,data_node=? where file_functional_id=?",(url,data_node,file_functional_id))
    conn.commit()
    c.close()
예제 #28
0
def build_path(name,product,project):
    if project in ('CMIP5','GeoMIP'):
        
        if product is None:
            sdlog.info("SD28TO29-011","Product is None (name=%s)"%name)

            # BEWARE
            # this case if for the 24 GeoMIP without product (select local_image from transfert where product_xml_tag is NULL and project_id=2;)

            path="{0}/{1}".format(project,name)
        else:
            path="{0}/{1}/{2}".format(project,product,name)
    elif project in ('CORDEX'):

        if 'output/' in name:
            # add project
            path="{0}/{1}".format(project,name)

        else:
            # for some datasets of CORDEX project, project and product are missing, so we add it (it's always 'output' for this project)

            # add output and project
            path="{0}/{1}/{2}".format(project,'output',name)
    else:
        # project is missing

        path="{0}/{1}".format(project,name)

    path=fix_project_name(path) # project name in 28 have been transformed in many columns (in 29, only local_path should have transformed fields)

    return path
예제 #29
0
def get_RTT(remote_host):

    if remote_host not in sdgc.RTT_cache:
        sdlog.info("SDNEARES-012", "Compute RTT for '%s' host." % remote_host)
        sdgc.RTT_cache[remote_host] = compute_RTT(remote_host)

    return sdgc.RTT_cache[remote_host]
예제 #30
0
def load_table_in_memory(tablename,indexname):
    global _in_memory_conn

    sdlog.info("SDDATABA-001","loading '%s' table"%tablename)

    # create a database in memory
    if _in_memory_conn is None:
        _in_memory_conn = sqlite3.connect(":memory:")

    # attach persistent DB
    _in_memory_conn.execute("ATTACH '%s' AS persistentdb"%get_db_name())

    # drop table if already exists in memory
    _in_memory_conn.execute("drop table if exists main.'%s'"%tablename)
    # copy table from persistent DB to memory
    _in_memory_conn.execute("create table main.'%s' as select * from persistentdb.[%s]"%(tablename,tablename))
    # create index
    _in_memory_conn.execute("create index if not exists main.'%s' on '%s' (file)"%(indexname,tablename))

    # commit
    _in_memory_conn.commit()

    # detach persistent DB
    _in_memory_conn.execute("detach persistentdb")  

    _in_memory_conn.row_factory = sqlite3.Row

    sdlog.info("SDDATABA-002","table loaded")
예제 #31
0
파일: sdevent.py 프로젝트: ncarenton/synda
def variable_complete_event(project,model,dataset,variable):
    sdlog.info("SYDEVENT-002","'variable_complete_event' triggered (%s,%s)"%(dataset.dataset_functional_id,variable))

    # cascade 1
    if dataset.status==sdconst.DATASET_STATUS_COMPLETE:
        dataset_complete_event(project,model,dataset) # trigger 'dataset complete' event

    # cascade 2
    if project=='CMIP5':

        assert '/output/' not in dataset.path

        (ds_path_output1,ds_path_output2)=sdproduct.get_output12_dataset_paths(dataset.path)
        if sddatasetdao.exists_dataset(path=ds_path_output1) and sddatasetdao.exists_dataset(path=ds_path_output2):

            d1=sddatasetdao.get_dataset(path=ds_path_output1)
            d2=sddatasetdao.get_dataset(path=ds_path_output2)

            if sdvariable.is_variable_complete(d1.dataset_id,variable) and sdvariable.is_variable_complete(d2.dataset_id,variable):
                dataset_pattern=sdproduct.build_output12_dataset_pattern(dataset.local_path)
                variable_complete_output12_event(project,model,dataset_pattern,variable) # trigger event (cross dataset event)
        else:
            # we also trigger the 'variable_complete_output12_event' event if the variable is over one product only (because if only one product, then output12 event is also true)

            dataset_pattern=sdproduct.build_output12_dataset_pattern(dataset.local_path)
            variable_complete_output12_event(project,model,dataset_pattern,variable) # trigger event (cross dataset event)
예제 #32
0
def run_helper(queries):
    """
    notes
      - "queries" is non-threadsafe (i.e. not a Queue), but doesn't matter as threads do not use it
    """
    total_query_to_process=len(queries)

    sdlog.debug("SDPROXMT-003","%d search-API queries to process (max_thread_per_host=%d,timeout=%d)"%(total_query_to_process,max_thread_per_host,sdconst.SEARCH_API_HTTP_TIMEOUT))

    while True:
        if sdconfig.proxymt_progress_stat:
            sdlog.info("SDPROXMT-033","threads per host: %s"%",".join(['%s=%s'%(host,len(searchAPIServices[host]['threadlist'])) for host in searchAPIServices.keys()]))

        if len(queries)>0:
            distribute_queries(queries)
        else:
            # leave the loop only if all threads completed
            if all_threads_completed():
                break

        # remove completed threads from list
        for host in searchAPIServices.keys():
            li=[]
            for t in searchAPIServices[host]['threadlist']:
                if t.is_alive():
                    li.append(t)
            searchAPIServices[host]['threadlist']=li

        # log
        total_query_already_processed = total_query_to_process - len(queries)
        if total_query_to_process > 0: # display progress only when there are a lot of queries
            if len(queries) > 0: # display progress only when still query to process
                sdlog.info("SDPROXMT-004","total_queries=%d, running_or_done_queries=%d, waiting_queries=%d"%(total_query_to_process,total_query_already_processed,len(queries)))

        # if all services are busy, we sleep to limit loop speed
        # (note that all the code around the "sleep" call is to detect system overload)
        sleep_time=10
        warning_threshold=5 # threshold not to emit warning for every small load exceedance
        befo=time.time()
        time.sleep(sleep_time)
        afte=time.time()
        diff=afte-befo
        if diff>sleep_time+warning_threshold:
            sdlog.warning("SDPROXMT-005","WARNING: system overload detected (sleep takes %d second to complete)."%diff)

    # retrieve result from output queue
    metadata=sdtypes.Metadata()
    while not __result_queue.empty():
        success=__result_queue.get(False) # retrieve result from ONE successful search-API call
        success.connect() # TAGKLK434L3K34K
        metadata.slurp(success) # warning: success is modified here

    # retrieve error from output queue and insert them into a list
    errors=[]
    while not __error_queue.empty():
        query=__error_queue.get(False)
        errors.append(query)

    return (metadata,errors)
예제 #33
0
def add_selection(us):
    # compute selection checksum from scratch
    l__file_checksum=sdutils.compute_checksum(us.get_selection_file_full_path())

    if not exists_selection(us):
        # add selection in database if missing

        us.set_checksum(l__file_checksum)
        us.set_status(sdconst.SELECTION_STATUS_NEW)
        us.set_fullscan(True)

        insertSelection(us) # warning: this modify us object (set PK)

    else:
        # selection already in database

        from_db_us=fetch_selection(us.get_filename()) # retrieve us from DB
        us.set_selection_id(from_db_us.get_selection_id())                  # copy DB id

        # check if same checksums
        if l__file_checksum==from_db_us.get_checksum():
            # same checksum

            # retrieve status
            us.set_status(from_db_us.get_status())
            us.set_checksum(from_db_us.get_checksum())

            if us.get_status()==sdconst.SELECTION_STATUS_NORMAL:

                # nothing to do here (let (a) and (b) decide if we need fullscan)
                pass

            elif us.get_status()==sdconst.SELECTION_STATUS_MODIFIED:

                us.set_fullscan(True)

            elif us.get_status()==sdconst.SELECTION_STATUS_NEW:

                us.set_fullscan(True)

            else:

                raise SDException("SYNDATSEL-071","unknown status")

        else:
            # same checksum
            # checksum differ

            sdlog.info("SYNDASEL-197","%s selection has been modified (marked for fullscan)"%us.get_filename())


            us.set_checksum(l__file_checksum)                  # update checksum
            us.set_status(sdconst.SELECTION_STATUS_MODIFIED) # update status

            update_selection(us)

    # add selection in selection list
    # TODO
    _selections[us.get_filename()]=us
예제 #34
0
def change_priority(new_priority,conn=sddb.conn):
    """Change priority value for already existing transfer."""
    c=conn.cursor()
    sdlog.info("SDMODIFQ-002","updating %s selection (new priority=%s)"%(u_s.filename,new_priority))
    res=c.execute("UPDATE file SET priority = ? WHERE EXISTS (SELECT 1 FROM selection__file WHERE file.file_id = selection__file.file_id AND selection__file.selection_id = ?)",(new_priority,u_s.get_selection_id(),))
    modified_files_count=c.rowcount
    conn.commit()
    c.close()
예제 #35
0
def reset():
    import sddeletedataset

    nbr = sddeletequery.purge_error_and_waiting_transfer()
    sddeletedataset.purge_orphan_datasets()

    sdlog.info("SDDELETE-931", "%i transfer(s) removed" % nbr)
    return nbr
예제 #36
0
def start_watchdog():
    """Starting download processes watchdog."""

    sdlog.info("SDTSCHED-993", "Starting watchdog..")

    frozenCheckerThread = sdwatchdog.FrozenDownloadCheckerThread()
    frozenCheckerThread.setDaemon(True)
    frozenCheckerThread.start()
예제 #37
0
def add_file(f):
    sdlog.info("SDENQUEU-003","Create transfer (local_path=%s,url=%s)"%(f.get_full_local_path(),f.url))

    f.dataset_id=add_dataset(f)
    f.status=sdconst.TRANSFER_STATUS_WAITING
    f.crea_date=sdtime.now()

    sdfiledao.add_file(f,commit=False)
예제 #38
0
def start_watchdog():
    """Starting download processes watchdog."""

    sdlog.info("SDTSCHED-993","Starting watchdog..")

    frozenCheckerThread=sdwatchdog.FrozenDownloadCheckerThread()
    frozenCheckerThread.setDaemon(True)
    frozenCheckerThread.start()
예제 #39
0
def reset():
    import sddeletedataset

    nbr=sddeletequery.purge_error_and_waiting_transfer()
    sddeletedataset.purge_orphan_datasets()

    sdlog.info("SDDELETE-931","%i transfer(s) removed"%nbr)
    return nbr
예제 #40
0
파일: sdrun.py 프로젝트: Prodiguer/synda
def sequential_exec(queries):
    search=sdproxy.SearchAPIProxy()
    metadata=sdtypes.Metadata()
    for i,q in enumerate(queries):
        sdlog.info("SYNDARUN-001","Process query %d"%i)
        result=search.run(url=q['url'],attached_parameters=q.get('attached_parameters'))
        metadata.slurp(result)
    return metadata
예제 #41
0
def change_priority(new_priority,conn=sddb.conn):
    """Change priority value for already existing transfer."""
    c=conn.cursor()
    sdlog.info("SDMODIFQ-002","updating %s selection (new priority=%s)"%(u_s.filename,new_priority))
    res=c.execute("UPDATE file SET priority = ? WHERE EXISTS (SELECT 1 FROM selection__file WHERE file.file_id = selection__file.file_id AND selection__file.selection_id = ?)",(new_priority,u_s.get_selection_id(),))
    modified_files_count=c.rowcount
    conn.commit()
    c.close()
예제 #42
0
def renew_certificate (host,port,username,password):

    sdlog.info("SDMYPROX-002","Renew certificate..")

    # we need a mkdir here to prevent 'No such file or directory' myproxyclient error (see TAGFERE5435 for more info)
    sd=sdconfig.get_security_dir()
    if not os.path.isdir(sd):
        os.makedirs(sd)

    # currently, we set bootstrap option everytime
    #
    # TODO: change this to set only the first time (i.e. if .esg/certificates is empty)
    #
    bootstrap=True

    # currently, we set trustroots option everytime
    updateTrustRoots=True
    authnGetTrustRootsCall=False


    # TODO: maybe add option in 'synda certificate' to use specify another path for cadir (for debugging purpose)
    #ROOT_TRUSTROOT_DIR = '/etc/grid-security/certificates'
    #USER_TRUSTROOT_DIR = '~/.globus/certificates'


    # set env.

    os.environ['ESGF_CREDENTIAL']=sdconfig.esgf_x509_proxy
    os.environ['ESGF_CERT_DIR']=sdconfig.esgf_x509_cert_dir
    os.environ['X509_CERT_DIR']=sdconfig.esgf_x509_cert_dir

    if 'X509_USER_PROXY' in os.environ: 
        del os.environ['X509_USER_PROXY']
    #if 'GLOBUS_LOCATION' in os.environ:
    #    del os.environ['GLOBUS_LOCATION']


    # main

    myproxy_clnt = MyProxyClient(hostname=host,
                                 port=port,
                                 caCertDir=sdconfig.esgf_x509_cert_dir,
                                 proxyCertLifetime=43200) # 12 hours

    # credname=credname
    creds=myproxy_clnt.logon(username, password,
                             bootstrap=bootstrap,
                             updateTrustRoots=updateTrustRoots,
                             authnGetTrustRootsCall=authnGetTrustRootsCall)


    # store cert on disk

    fout = open(sdconfig.esgf_x509_proxy, 'w')
    for cred in creds:
        fout.write(cred)
    fout.close()
예제 #43
0
def sequential_exec(queries):
    search = sdproxy.SearchAPIProxy()
    metadata = sdtypes.Metadata()
    for i, q in enumerate(queries):
        sdlog.info("SYNDARUN-001", "Process query %d" % i)
        result = search.run(url=q['url'],
                            attached_parameters=q.get('attached_parameters'))
        metadata.slurp(result)
    return metadata
예제 #44
0
        def inner(*args,**kwargs):

            start_time=SDTimer.get_time()
            result = func(*args,**kwargs)
            elapsed_time=SDTimer.get_elapsed_time(start_time)

            sdlog.info('SDPROFIL-001','%s ran in %2.9f sec' %(func.__name__,elapsed_time))

            return result
예제 #45
0
def install(args,selection):

    # TODO: maybe force type=file here, in case the selection file have 'type=Dataset'

    if not args.dry_run:
        sdlog.info("SDUPGRAD-001","Retrieve metadata from ESGF..")
        metadata=sdsearch.run(selection=selection)
        sdlog.info("SDUPGRAD-002","Install files..")
        (status,newly_installed_files_count)=sdinstall.run(args,metadata)
예제 #46
0
def event(events):
    try:
        sdlog.info("SDPPPROX-001","Push events to postprocessing")
        serialized_events=[e.__dict__ for e in events] # transform list of event to list of dict (needed, because custom class cannot be serialized to JSON)
        get_service().event(serialized_events) # send events
        sdlog.info("SDPPPROX-002","%i events successfully transmitted to postprocessing"%len(serialized_events))
    except urllib2.URLError,e:
        sdlog.error("SDPPPROX-010","Network error occured (url=%s,port=%s,%s)"%(url,port,str(e)))
        raise RemoteException("SDPPPROX-100","Network error occured")
예제 #47
0
        def inner(*args, **kwargs):

            start_time = SDTimer.get_time()
            result = func(*args, **kwargs)
            elapsed_time = SDTimer.get_elapsed_time(start_time)

            sdlog.info('SDPROFIL-001',
                       '%s ran in %2.9f sec' % (func.__name__, elapsed_time))

            return result
예제 #48
0
def add_file(f):
    sdlog.info(
        "SDENQUEU-003", "Create transfer (local_path=%s,url=%s)" %
        (f.get_full_local_path(), f.url))

    f.dataset_id = add_dataset(f)
    f.status = sdconst.TRANSFER_STATUS_WAITING
    f.crea_date = sdtime.now()

    sdfiledao.add_file(f, commit=False)
예제 #49
0
def immediate_md_delete(tr):
    """Delete file (metadata only)."""
    sdlog.info("SDDELETE-080",
               "Delete metadata (%s)" % tr.get_full_local_path())
    try:
        sdfiledao.delete_file(tr, commit=False)
    except Exception, e:
        sdlog.error(
            "SDDELETE-128",
            "Error occurs during file metadata suppression (%s,%s)" %
            (tr.get_full_local_path(), str(e)))
예제 #50
0
def update_datasets_status():
    """
    Update status flag for all datasets.

    Notes
        - This func is used to fix inconsistencies.
        - This func doesn't handle the 'latest' flag
    """
    sdlog.info("SYDDFLAG-186", "Update status for all datasets")
    datasets = sddatasetdao.get_datasets()
    update_datasets_status_HELPER(datasets)
예제 #51
0
def renew_certificate(host, port, username, password):

    sdlog.info("SDMYPROX-002", "Renew certificate..")

    # we need a mkdir here to prevent 'No such file or directory' myproxyclient error (see TAGFERE5435 for more info)
    sd = sdconfig.get_security_dir()
    if not os.path.isdir(sd):
        os.makedirs(sd)

    # currently, we set bootstrap option everytime
    #
    # TODO: change this to set only the first time (i.e. if .esg/certificates is empty)
    #
    bootstrap = True

    # currently, we set trustroots option everytime
    updateTrustRoots = True
    authnGetTrustRootsCall = False

    # TODO: maybe add option in 'synda certificate' to use specify another path for cadir (for debugging purpose)
    #ROOT_TRUSTROOT_DIR = '/etc/grid-security/certificates'
    #USER_TRUSTROOT_DIR = '~/.globus/certificates'

    # set env.

    os.environ['ESGF_CREDENTIAL'] = sdconfig.esgf_x509_proxy
    os.environ['ESGF_CERT_DIR'] = sdconfig.esgf_x509_cert_dir
    os.environ['X509_CERT_DIR'] = sdconfig.esgf_x509_cert_dir

    if 'X509_USER_PROXY' in os.environ:
        del os.environ['X509_USER_PROXY']
    #if 'GLOBUS_LOCATION' in os.environ:
    #    del os.environ['GLOBUS_LOCATION']

    # main

    myproxy_clnt = MyProxyClient(hostname=host,
                                 port=port,
                                 caCertDir=sdconfig.esgf_x509_cert_dir,
                                 proxyCertLifetime=43200)  # 12 hours

    # credname=credname
    creds = myproxy_clnt.logon(username,
                               password,
                               bootstrap=bootstrap,
                               updateTrustRoots=updateTrustRoots,
                               authnGetTrustRootsCall=authnGetTrustRootsCall)

    # store cert on disk

    fout = open(sdconfig.esgf_x509_proxy, 'w')
    for cred in creds:
        fout.write(cred)
    fout.close()
예제 #52
0
def upgrade_db(conn,current_db_version,new_db_version):
    versions = upgrade_procs.keys()
    li=sddbversionutils.version_range( versions, current_db_version, new_db_version )

    # remove the first value (i.e. no upgrade needed there as db is already at this version)
    li=li[1:]

    for v in li:
        if v not in upgrade_procs:
            raise SDException("SDDBVERS-318","Incorrect database version: cannot upgrade database (version=%s)."%(v,))
        else:
            upgrade_procs[v](conn)
            sdlog.info("SDDBVERS-319","Database updated to version %s"%(v,))
예제 #53
0
def reset_datasets_flags():
    """Reset dataset status and latest flag from scratch for all datasets."""
    count = 0

    sdlog.info("SYDDFLAG-933",
               "recalculate status and latest flag for all dataset..", True)

    sdmodifyquery.wipeout_datasets_flags(
    )  # we reset all flags before starting the main processing (we clean everything to start from scratch)

    count = update_datasets__status_and_latest()
    while count > 0:
        count = update_datasets__status_and_latest()
예제 #54
0
def update_complete_datasets_status():
    """
    Update status flag for datasets with complete status.

    Notes
        - This func is used to fix inconsistencies, when dataset have complete status,
          but some of its files are not 'done' yet.
        - This func doesn't handle the 'latest' flag
    """
    sdlog.info("SYDDFLAG-184", "Update complete datasets status")
    complete_datasets = sddatasetdao.get_datasets(
        status=sdconst.DATASET_STATUS_COMPLETE)
    update_datasets_status_HELPER(complete_datasets)