def append_process(xmldoc, program = None, version = None, cvs_repository = None, cvs_entry_time = None, comment = None, is_online = False, jobid = 0, domain = None, ifos = None):
	"""
	Add an entry to the process table in xmldoc.  program, version,
	cvs_repository, comment, and domain should all be strings or
	unicodes.  cvs_entry_time should be a string or unicode in the
	format "YYYY/MM/DD HH:MM:SS".  is_online should be a boolean, jobid
	an integer.  ifos should be an iterable (set, tuple, etc.) of
	instrument names.

	See also register_to_xmldoc().
	"""
	try:
		proctable = lsctables.ProcessTable.get_table(xmldoc)
	except ValueError:
		proctable = lsctables.New(lsctables.ProcessTable)
		xmldoc.childNodes[0].appendChild(proctable)

	proctable.sync_next_id()

	process = proctable.RowType()
	process.program = program
	process.version = version
	process.cvs_repository = cvs_repository
	# FIXME:  remove the "" case when the git versioning business is
	# sorted out
	if cvs_entry_time is not None and cvs_entry_time != "":
		try:
			# try the git_version format first
			process.cvs_entry_time = _UTCToGPS(time.strptime(cvs_entry_time, "%Y-%m-%d %H:%M:%S +0000"))
		except ValueError:
			# fall back to the old cvs format
			process.cvs_entry_time = _UTCToGPS(time.strptime(cvs_entry_time, "%Y/%m/%d %H:%M:%S"))
	else:
		process.cvs_entry_time = None
	process.comment = comment
	process.is_online = int(is_online)
	process.node = socket.gethostname()
	try:
		process.username = get_username()
	except KeyError:
		process.username = None
	process.unix_procid = os.getpid()
	process.start_time = _UTCToGPS(time.gmtime())
	process.end_time = None
	process.jobid = jobid
	process.domain = domain
	process.instruments = ifos
	process.process_id = proctable.get_next_id()
	proctable.append(process)
	return process
Esempio n. 2
0
def append_process(xmldoc, program = None, version = None, cvs_repository = None, cvs_entry_time = None, comment = None, is_online = False, jobid = 0, domain = None, ifos = None):
	"""
	Add an entry to the process table in xmldoc.  program, version,
	cvs_repository, comment, and domain should all be strings or
	unicodes.  cvs_entry_time should be a string or unicode in the
	format "YYYY/MM/DD HH:MM:SS".  is_online should be a boolean, jobid
	an integer.  ifos should be an iterable (set, tuple, etc.) of
	instrument names.

	See also register_to_xmldoc().
	"""
	try:
		proctable = lsctables.ProcessTable.get_table(xmldoc)
	except ValueError:
		proctable = lsctables.New(lsctables.ProcessTable)
		xmldoc.childNodes[0].appendChild(proctable)

	proctable.sync_next_id()

	process = proctable.RowType()
	process.program = program
	process.version = version
	process.cvs_repository = cvs_repository
	# FIXME:  remove the "" case when the git versioning business is
	# sorted out
	if cvs_entry_time is not None and cvs_entry_time != "":
		try:
			# try the git_version format first
			process.cvs_entry_time = _UTCToGPS(time.strptime(cvs_entry_time, "%Y-%m-%d %H:%M:%S +0000"))
		except ValueError:
			# fall back to the old cvs format
			process.cvs_entry_time = _UTCToGPS(time.strptime(cvs_entry_time, "%Y/%m/%d %H:%M:%S"))
	else:
		process.cvs_entry_time = None
	process.comment = comment
	process.is_online = int(is_online)
	process.node = socket.gethostname()
	try:
		process.username = get_username()
	except KeyError:
		process.username = None
	process.unix_procid = os.getpid()
	process.start_time = _UTCToGPS(time.gmtime())
	process.end_time = None
	process.jobid = jobid
	process.domain = domain
	process.instruments = ifos
	process.process_id = proctable.get_next_id()
	proctable.append(process)
	return process
Esempio n. 3
0
def append_process_gpssane(xmldoc, program = None, version = None, cvs_repository = None, cvs_entry_time = None, comment = None, is_online = False, jobid = 0, domain = None, ifos = None):
    """
    Add an entry to the process table in xmldoc.  program, version,
    cvs_repository, comment, and domain should all be strings or
    unicodes.  cvs_entry_time should be a 9 or 10 digit GPS time
    is_online should be a boolean, jobid
    an integer.  ifos should be an iterable (set, tuple, etc.) of
    instrument names.

    See also register_to_xmldoc().
    """
    try:
            proctable = lsctables.ProcessTable.get_table(xmldoc)
    except ValueError:
            proctable = lsctables.New(lsctables.ProcessTable)
            xmldoc.childNodes[0].appendChild(proctable)

    proctable.sync_next_id()

    process = proctable.RowType()
    process.program = program
    process.version = version
    process.cvs_repository = cvs_repository
    ## FIXME:  remove the "" case when the git versioning business is
    ## sorted out
    #if cvs_entry_time is not None and cvs_entry_time != "":
    #        try:
    #                # try the git_version format first
    #                process.cvs_entry_time = _UTCToGPS(time.strptime(cvs_entry_time, "%Y-%m-%d %H:%M:%S +0000"))
    #        except ValueError:
    #                # fall back to the old cvs format
    #                process.cvs_entry_time = _UTCToGPS(time.strptime(cvs_entry_time, "%Y/%m/%d %H:%M:%S"))
    #else:
    #        process.cvs_entry_time = None
    ## Instead of all of that above, just require the input time to be a GPS
    process.cvs_entry_time=cvs_entry_time
    process.comment = comment
    process.is_online = int(is_online)
    process.node = socket.gethostname()
    try:
            process.username = get_username()
    except KeyError:
            process.username = None
    process.unix_procid = os.getpid()
    ### Fix!!!  This GPS time is just plain wrong....:  I think it's not taking into account the time zone.
    process.start_time = _UTCToGPS(time.gmtime())
    process.end_time = None
    process.jobid = jobid
    process.domain = domain
    process.set_ifos(ifos)
    process.process_id = proctable.get_next_id()
    proctable.append(process)
    return process
Esempio n. 4
0
def set_process_end_time(process):
	"""
	Set the end time in a row in a process table to the current time.
	"""
	process.end_time = _UTCToGPS(time.gmtime())
	return process
Esempio n. 5
0
def InsertMultipleDQXMLFileThreaded(filenames,
                                    logger,
                                    server='http://slwebtest.virgo.infn.it',
                                    hackDec11=True,
                                    debug=True,
                                    threads=1,
                                    testing_options={}):
    """ 
    Inserts multiple dqxml files of data into the DQSEGDB.
    - filenames is a list of string filenames for  DQXML files.
    - hackDec11 is used to turn off good features that the server doesn't
    yet support.
    returns True if it completes sucessfully
    - options is a dictionary including (optionally):offset(int),synchronize(time in 'HH:MM' format (string))
    """
    logger.info(
        "Beginning call to InsertMultipleDQXMLFileThreaded.  This message last updated April 14 2015, Ciao da Italia!"
    )
    from threading import Thread
    from Queue import Queue
    import sys

    # Make a call to server+'/dq':
    protocol = server.split(':')[0]
    serverfqdn = server.split('/')[-1]
    apiResult = queryAPIVersion(protocol, serverfqdn, False)
    # If the API change results in a backwards incompatibility, handle it here with a flag that affects behavior below
    if apiResult >= "2.1.0":
        # S6 style comments are needed
        new_comments = True
    else:
        # Older server, so don't want to supply extra comments...
        new_comments = False
    if apiResult >= "2.1.15":
        # Alteration to insertion_metadata from uri to comment to accomodate s6 data conversion
        use_new_insertion_metadata = True
    else:
        use_new_insertion_metadata = False

    if 'offset' in testing_options:
        offset = int(testing_options['offset'])
    else:
        offset = 0
    if 'synchronize' in testing_options:
        synchronize = testing_options['synchronize']

    xmlparser = pyRXP.Parser()
    lwtparser = ldbd.LIGOLwParser()

    flag_versions = {}

    # flag_versions, filename, server, hackDec11, debug are current variables

    # This next bunch of code is specific to a given file:
    if len(filenames) < 1:
        print "Empty file list sent to InsertMultipleDQXMLFileThreaded"
        raise ValueError
    for filename in filenames:

        segment_md = setupSegment_md(filename, xmlparser, lwtparser, debug)

        # segment_md, flag_versions, filename, server, hackDec11, debug are current variables

        flag_versions_numbered = {}

        for j in range(len(segment_md.table['segment_definer']['stream'])):
            flag_versions_numbered[j] = {}
            for i, entry in enumerate(
                    segment_md.table['segment_definer']['orderedcol']):
                #print j,entry,segment_md.table['segment_definer']['stream'][j][i]
                flag_versions_numbered[j][entry] = segment_md.table[
                    'segment_definer']['stream'][j][i]

        # parse process table and make a dict that corresponds with each
        # process, where the keys for the dict are like "process:process_id:1"
        # so that we can match
        # these to the flag_versions from the segment definer in the next
        # section

        # Note:  Wherever temp_ preceeds a name, it is generally an identifier
        # field from the dqxml, that is only good for the single dqxml file
        # being parsed

        process_dict = {}
        # Going to assign process table streams to process_dict with a key
        # matching process_id (process:process_id:0 for example)
        for j in range(len(segment_md.table['process']['stream'])):
            process_id_index = segment_md.table['process']['orderedcol'].index(
                'process_id')
            temp_process_id = segment_md.table['process']['stream'][j][
                process_id_index]
            # Now we're going to assign elements to process_dict[process_id]
            process_dict[temp_process_id] = {}
            for i, entry in enumerate(
                    segment_md.table['process']['orderedcol']):
                #print j,entry,segment_md.table['process']['stream'][j][i]
                process_dict[temp_process_id][entry] = segment_md.table[
                    'process']['stream'][j][i]
                # Note that the segment_md.table['process']['stream'][0] looks like this:
                #0 program SegGener
                #0 version 6831
                #0 cvs_repository https://redoubt.ligo-wa.caltech.edu/
                #0                svn/gds/trunk/Monitors/SegGener/SegGener.cc
                #0 cvs_entry_time 1055611021
                #0 comment Segment generation from an OSC condition
                #0 node l1gds2
                #0 username [email protected]
                #0 unix_procid 24286
                #0 start_time 1065916603
                #0 end_time 1070395521
                #0 process_id process:process_id:0
                #0 ifos L0L1
                # So now I have all of that info stored by the process_id keys
                # Eventually I have to map these elements to the process_metadata
                # style.. maybe I can do that now:
            process_dict[temp_process_id]['process_metadata'] = {}
            if hackDec11:
                process_dict[temp_process_id]['process_metadata'][
                    'process_start_time'] = process_dict[temp_process_id][
                        'start_time']
            else:  # This is for the newer server APIs:  (April 24 2015 we checked it (it probably changed before ER6 finally))
                process_dict[temp_process_id]['process_metadata'][
                    'process_start_timestamp'] = process_dict[temp_process_id][
                        'start_time']
            if new_comments:
                process_dict[temp_process_id][
                    'process_comment'] = process_dict[temp_process_id][
                        'comment']
            process_dict[temp_process_id]['process_metadata'][
                'uid'] = process_dict[temp_process_id]['username']
            process_dict[temp_process_id]['process_metadata']['args'] = [
            ]  ### Fix!!! dqxml has no args???
            process_dict[temp_process_id]['process_metadata'][
                'pid'] = process_dict[temp_process_id]['unix_procid']
            process_dict[temp_process_id]['process_metadata'][
                'name'] = process_dict[temp_process_id]['program']
            process_dict[temp_process_id]['process_metadata'][
                'fqdn'] = process_dict[temp_process_id][
                    'node']  ### Fix!!! Improvement: not really fqdn, just the node name

        # So now I have process_dict[temp_process_id]['process_metadata'] for each
        # process_id, and can add it to a flag version when it uses it;  really I
        # should group it with the segment summary info because that has the
        # insertion_metadata start and stop time

        ### Fix!!! Get the args from the *other* process table... yikes
        ### Double check what is done below works!
        # First pass:
        #if debug:
        #    import pdb
        #    pdb.set_trace()

        temp_process_params_process_id = None
        try:
            len(segment_md.table['process_params']['stream'])
        except:
            logger.info("No process_params table for file: %s" % filename)
        else:
            for j in range(len(segment_md.table['process_params']['stream'])):
                process_id_index = segment_md.table['process_params'][
                    'orderedcol'].index('process_id')
                temp_process_params_process_id = segment_md.table[
                    'process_params']['stream'][j][process_id_index]
                #  This next bit looks a bit strange, but the goal is to pull off only the param and value from each row of the process_params table, and then put them into the process_metadata
                #  Thus we loop through the columns in each row and toss out everything but the param and value entries, and then outside the for loop, append them to the args list
                for i, entry in enumerate(
                        segment_md.table['process_params']['orderedcol']):
                    if entry == "param":
                        temp_param = str(
                            segment_md.table['process_params']['stream'][j][i])
                    if entry == "value":
                        temp_value = str(
                            segment_md.table['process_params']['stream'][j][i])
                process_dict[temp_process_params_process_id][
                    'process_metadata']['args'].append(str(temp_param))
                process_dict[temp_process_params_process_id][
                    'process_metadata']['args'].append(str(temp_value))

        #if debug:
        #    import pdb
        #    pdb.set_trace()

        temp_id_to_flag_version = {}

        for i in flag_versions_numbered.keys():
            ifo = flag_versions_numbered[i]['ifos']
            name = flag_versions_numbered[i]['name']
            version = flag_versions_numbered[i]['version']
            if (ifo, name, version) not in flag_versions.keys():
                if new_comments == True:
                    flag_versions[(ifo, name, version)] = InsertFlagVersion(
                        ifo, name, version)
                else:
                    flag_versions[(ifo, name, version)] = InsertFlagVersionOld(
                        ifo, name, version)
                if new_comments:
                    flag_versions[(ifo, name, version)].flag_description = str(
                        flag_versions_numbered[i]['comment']
                    )  # old segment_definer comment = new flag_description
                    # OUTDATED PLACEHOLDER: flag_versions[(ifo,name,version)].version_comment=str(flag_versions_numbered[i]['comment'])
                else:
                    flag_versions[(ifo, name, version)].flag_comment = str(
                        flag_versions_numbered[i]['comment'])
                    flag_versions[(ifo, name, version)].version_comment = str(
                        flag_versions_numbered[i]['comment'])
            flag_versions[(
                ifo, name,
                version)].temporary_definer_id = flag_versions_numbered[i][
                    'segment_def_id']
            flag_versions[(
                ifo, name, version
            )].temporary_process_id = flag_versions_numbered[i]['process_id']
            # Populate reverse lookup dictionary:
            temp_id_to_flag_version[flag_versions[(
                ifo, name, version)].temporary_definer_id] = (ifo, name,
                                                              version)

        # ways to solve the metadata problem:
        # Associate each insertion_metadata block with a process, then group
        # them and take the min insert_data_start and max insert_data_stop

        # parse segment_summary table and associate known segments with
        # flag_versions above:
        ## Note this next line is needed for looping over multiple files
        for i in flag_versions.keys():
            flag_versions[i].temp_process_ids = {}
        for j in range(len(segment_md.table['segment_summary']['stream'])):
            #flag_versions_numbered[j] = {}
            seg_def_index = segment_md.table['segment_summary'][
                'orderedcol'].index('segment_def_id')
            #print "associated seg_def_id is: "+ segment_md.table['segment_summary']['stream'][j][seg_def_index]
            (ifo, name, version
             ) = temp_id_to_flag_version[segment_md.table['segment_summary']
                                         ['stream'][j][seg_def_index]]
            seg_sum_index = segment_md.table['segment_summary'][
                'orderedcol'].index('segment_sum_id')
            # Unneeded:
            #flag_versions[(ifo,name,version)].temporary_segment_sum_id = segment_md.table['segment_summary']['stream'][j][seg_sum_index]
            start_time_index = segment_md.table['segment_summary'][
                'orderedcol'].index('start_time')
            end_time_index = segment_md.table['segment_summary'][
                'orderedcol'].index('end_time')
            start_time = segment_md.table['segment_summary']['stream'][j][
                start_time_index] + offset
            end_time = segment_md.table['segment_summary']['stream'][j][
                end_time_index] + offset
            comment_index = segment_md.table['segment_summary'][
                'orderedcol'].index('comment')
            seg_sum_comment = segment_md.table['segment_summary']['stream'][j][
                comment_index]
            new_seg_summary = segments.segmentlist(
                [segments.segment(start_time, end_time)])
            flag_versions[(ifo, name, version)].appendKnown(new_seg_summary)
            # Now I need to build up the insertion_metadata dictionary for this
            # summary:
            # Now I need to associate the right process with the known
            # segments here, and put the start and end time into the
            # insertion_metadata part of the
            #  insert_history dict
            # Plan for processes and affected data:
            # Loop through segment summaries
            # If we haven't seen the associated process before, create it:
            # First, append the temp_process_id to temp_process_ids
            # Then, each temp_process_ids entry is a dictionary, where the one
            # element is start_affected time, and the other is end_affected
            # time, and later we will combine this with the correct
            # process_metadata dictionary
            process_id_index = segment_md.table['segment_summary'][
                'orderedcol'].index('process_id')
            temp_process_id = segment_md.table['segment_summary']['stream'][j][
                process_id_index]
            if temp_process_id in flag_versions[(
                    ifo, name, version)].temp_process_ids.keys():
                # We don't need to append this process metadata, as it already
                # exists We do need to extend the affected data start and stop
                # to match
                if start_time < flag_versions[(
                        ifo, name, version
                )].temp_process_ids[temp_process_id]['insert_data_start']:
                    flag_versions[(ifo, name, version)].temp_process_ids[
                        temp_process_id]['insert_data_start'] = start_time
                if end_time > flag_versions[(
                        ifo, name, version
                )].temp_process_ids[temp_process_id]['insert_data_stop']:
                    flag_versions[(ifo, name, version)].temp_process_ids[
                        temp_process_id]['insert_data_stop'] = end_time
            else:
                # Need to make the dictionary entry for this process_id
                if seg_sum_comment != None:
                    flag_versions[(ifo, name,
                                   version)].provenance_url = seg_sum_comment
                else:
                    flag_versions[(ifo, name, version)].provenance_url = ''
                flag_versions[(
                    ifo, name,
                    version)].temp_process_ids[temp_process_id] = {}
                flag_versions[(ifo, name, version)].temp_process_ids[
                    temp_process_id]['insert_data_start'] = start_time
                flag_versions[(ifo, name, version)].temp_process_ids[
                    temp_process_id]['insert_data_stop'] = end_time

        # Now, I need to append an insert_history element to the flag_versions
        # for this ifo,name, version, as I have the correct insertion_metadata
        # and the correct
        # process_metadata (from the process_dict earlier
        if debug:
            t1 = time.time()
        for i in flag_versions.keys():
            for pid in flag_versions[i].temp_process_ids.keys():
                start = flag_versions[i].temp_process_ids[pid][
                    'insert_data_start']
                stop = flag_versions[i].temp_process_ids[pid][
                    'insert_data_stop']
                if new_comments:
                    flag_versions[i].flag_version_comment = process_dict[pid][
                        'process_comment']
                insert_history_dict = {}
                try:
                    insert_history_dict['process_metadata'] = process_dict[
                        pid]['process_metadata']
                except:
                    raise
                #    import pdb
                #    pdb.set_trace()
                insert_history_dict['insertion_metadata'] = {}
                insert_history_dict['insertion_metadata'][
                    'insert_data_stop'] = stop
                insert_history_dict['insertion_metadata'][
                    'insert_data_start'] = start
                ifo = flag_versions[i].ifo
                version = flag_versions[i].version
                name = flag_versions[i].name
                if use_new_insertion_metadata == True:
                    insert_history_dict['insertion_metadata'][
                        'comment'] = '/dq/' + '/'.join(
                            [str(ifo), str(name),
                             str(version)]
                        )  # FIX make dq a constant string in case we ever change it
                else:
                    insert_history_dict['insertion_metadata'][
                        'uri'] = '/dq/' + '/'.join(
                            [str(ifo), str(name),
                             str(version)]
                        )  # FIX make dq a constant string in case we ever change it
                #print ifo,name,version
                insert_history_dict['insertion_metadata'][
                    'timestamp'] = _UTCToGPS(time.gmtime())
                insert_history_dict['insertion_metadata'][
                    'auth_user'] = process.get_username()
                #if hackDec11:
                #    # note that this only uses one insert_history...despite
                #    all that hard work to get the list right...
                #    # so this might break something...
                #    flag_versions[i].insert_history=insert_history_dict
                #else:
                #    flag_versions[i].insert_history.append(insert_history_dict)
                flag_versions[i].insert_history.append(insert_history_dict)

        # parse segment table and associate known segments with flag_versions
        # above:
        try:
            for j in range(len(segment_md.table['segment']['stream'])):
                #flag_versions_numbered[j] = {}
                seg_def_index = segment_md.table['segment'][
                    'orderedcol'].index('segment_def_id')
                #print "associated seg_def_id is: "+
                #    segment_md.table['segment']['stream'][j][seg_def_index]
                (ifo, name, version) = temp_id_to_flag_version[
                    segment_md.table['segment']['stream'][j][seg_def_index]]
                #seg_sum_index = segment_md.table['segment']['orderedcol'].index('segment_sum_id')
                start_time_index = segment_md.table['segment'][
                    'orderedcol'].index('start_time')
                end_time_index = segment_md.table['segment'][
                    'orderedcol'].index('end_time')
                start_time = segment_md.table['segment']['stream'][j][
                    start_time_index] + offset
                end_time = segment_md.table['segment']['stream'][j][
                    end_time_index] + offset
                new_seg = segments.segmentlist(
                    [segments.segment(start_time, end_time)])
                flag_versions[(ifo, name, version)].appendActive(new_seg)
        except KeyError:
            logger.info("No segment table for this file: %s" % filename)
            if debug:
                print "No segment table for this file: %s" % filename
        except:
            print "Unexpected error:", sys.exc_info()[0]
            raise

    for i in flag_versions.keys():
        flag_versions[i].coalesceInsertHistory()

    if threads > 1:
        # Call this after the loop over files, and we should be good to go
        concurrent = min(threads, len(i))  # Fix!!! why did I do len(i) ???
        q = Queue(concurrent *
                  2)  # Fix!!! Improvement: remove hardcoded concurrency
        for i in range(concurrent):
            t = Thread(target=threadedPatchWithFailCases,
                       args=[q, server, debug, logger])
            t.daemon = True
            t.start()
        for i in flag_versions.values():
            i.buildFlagDictFromInsertVersion()
            #i.flagDict
            url = i.buildURL(server)
            if debug:
                print url
                logger.debug("json.dumps(i.flagDict):")
                logger.debug("%s" % json.dumps(i.flagDict))
            #if hackDec11:
            #    if len(i.active)==0:
            #        print "No segments for this url"
            #        continue
            q.put(i)
        q.join()
    else:
        for i in flag_versions.values():
            i.buildFlagDictFromInsertVersion()
            #i.flagDict
            url = i.buildURL(server)
            if debug:
                logger.debug("Url for the following data: %s" % url)
                #print url
                logger.debug("json.dumps(i.flagDict):")
                logger.debug("%s" % json.dumps(i.flagDict))
            #if hackDec11:
            #    if len(i.active)==0:
            #        print "No segments for this url"
            #        continue
            patchWithFailCases(i, url, debug, logger, testing_options)

    if debug:
        logger.debug(
            "If we made it this far, no errors were encountered in the inserts."
        )
        #print "If we made it this far, no errors were encountered in the inserts."
    ### Fix!!! Improvement: Should be more careful about error handling here.
    if debug:
        t2 = time.time()
        logger.debug("Time elapsed for file %s = %d." % (filename, t2 - t1))
        #print "Time elapsed for file %s = %d." % (filename,t2-t1)
    return True
Esempio n. 6
0
def InsertMultipleDQXMLFileThreaded(filenames,logger,server='http://slwebtest.virgo.infn.it',hackDec11=True,debug=True,threads=1,testing_options={}):
    """ 
    Inserts multiple dqxml files of data into the DQSEGDB.
    - filenames is a list of string filenames for  DQXML files.
    - hackDec11 is used to turn off good features that the server doesn't
    yet support.
    returns True if it completes sucessfully
    - options is a dictionary including (optionally):offset(int),synchronize(time in 'HH:MM' format (string))
    """
    logger.info("Beginning call to InsertMultipleDQXMLFileThreaded.  This message last updated April 14 2015, Ciao da Italia!")
    from threading import Thread
    from Queue import Queue
    import sys

    # Make a call to server+'/dq':
    protocol=server.split(':')[0]
    serverfqdn=server.split('/')[-1]
    apiResult=queryAPIVersion(protocol,serverfqdn,False)
    # If the API change results in a backwards incompatibility, handle it here with a flag that affects behavior below
    if apiResult >= "2.1.0":
        # S6 style comments are needed
        new_comments=True
    else:
        # Older server, so don't want to supply extra comments... 
        new_comments=False


    if 'offset' in testing_options:
        offset=int(testing_options['offset'])
    else:
        offset=0
    if 'synchronize' in testing_options:
        synchronize=testing_options['synchronize']

    xmlparser = pyRXP.Parser()
    lwtparser = ldbd.LIGOLwParser()
    
    flag_versions = {}
    
    # flag_versions, filename, server, hackDec11, debug are current variables

    # This next bunch of code is specific to a given file:
    if len(filenames)<1:
        print "Empty file list sent to InsertMultipleDQXMLFileThreaded"
        raise ValueError
    for filename in filenames:
    
        segment_md = setupSegment_md(filename,xmlparser,lwtparser,debug)

        # segment_md, flag_versions, filename, server, hackDec11, debug are current variables
        
        flag_versions_numbered = {}
        
        for j in range(len(segment_md.table['segment_definer']['stream'])):
            flag_versions_numbered[j] = {}
            for i,entry in enumerate(segment_md.table['segment_definer']['orderedcol']):
              #print j,entry,segment_md.table['segment_definer']['stream'][j][i]
              flag_versions_numbered[j][entry] = segment_md.table['segment_definer']['stream'][j][i]
        
        
        # parse process table and make a dict that corresponds with each
        # process, where the keys for the dict are like "process:process_id:1"
        # so that we can match
        # these to the flag_versions from the segment definer in the next
        # section
        
        # Note:  Wherever temp_ preceeds a name, it is generally an identifier
        # field from the dqxml, that is only good for the single dqxml file
        # being parsed
        
        
        process_dict = {}
        # Going to assign process table streams to process_dict with a key
        # matching process_id (process:process_id:0 for example)
        for j in range(len(segment_md.table['process']['stream'])):
            process_id_index = segment_md.table['process']['orderedcol'].index('process_id')
            temp_process_id = segment_md.table['process']['stream'][j][process_id_index]
            # Now we're going to assign elements to process_dict[process_id]
            process_dict[temp_process_id] = {}
            for i,entry in enumerate(segment_md.table['process']['orderedcol']):
                #print j,entry,segment_md.table['process']['stream'][j][i]
                process_dict[temp_process_id][entry] = segment_md.table['process']['stream'][j][i]
                # Note that the segment_md.table['process']['stream'][0] looks like this:
                #0 program SegGener
                #0 version 6831
                #0 cvs_repository https://redoubt.ligo-wa.caltech.edu/
                #0                svn/gds/trunk/Monitors/SegGener/SegGener.cc
                #0 cvs_entry_time 1055611021
                #0 comment Segment generation from an OSC condition
                #0 node l1gds2
                #0 username [email protected]
                #0 unix_procid 24286
                #0 start_time 1065916603
                #0 end_time 1070395521
                #0 process_id process:process_id:0
                #0 ifos L0L1
                # So now I have all of that info stored by the process_id keys
                # Eventually I have to map these elements to the process_metadata
                # style.. maybe I can do that now:
            process_dict[temp_process_id]['process_metadata'] = {}
            if hackDec11:
                process_dict[temp_process_id]['process_metadata']['process_start_time'] = process_dict[temp_process_id]['start_time']
            else: # This is for the newer server APIs:  (April 24 2015 we checked it (it probably changed before ER6 finally))
                process_dict[temp_process_id]['process_metadata']['process_start_timestamp'] = process_dict[temp_process_id]['start_time']
            if new_comments:
                process_dict[temp_process_id]['process_comment']=process_dict[temp_process_id]['comment']
            process_dict[temp_process_id]['process_metadata']['uid'] = process_dict[temp_process_id]['username']
            process_dict[temp_process_id]['process_metadata']['args'] = [] ### Fix!!! dqxml has no args???
            process_dict[temp_process_id]['process_metadata']['pid'] = process_dict[temp_process_id]['unix_procid']
            process_dict[temp_process_id]['process_metadata']['name'] = process_dict[temp_process_id]['program']
            process_dict[temp_process_id]['process_metadata']['fqdn'] = process_dict[temp_process_id]['node'] ### Fix!!! Improvement: not really fqdn, just the node name
        
        # So now I have process_dict[temp_process_id]['process_metadata'] for each
        # process_id, and can add it to a flag version when it uses it;  really I
        # should group it with the segment summary info because that has the
        # insertion_metadata start and stop time
        
        ### Fix!!! Get the args from the *other* process table... yikes
        ### Double check what is done below works!
        # First pass: 
        #if debug:
        #    import pdb
        #    pdb.set_trace()

        temp_process_params_process_id=None
        try:
            len(segment_md.table['process_params']['stream'])
        except:
            logger.info("No process_params table for file: %s" % filename)
        else:
            for j in range(len(segment_md.table['process_params']['stream'])):
                process_id_index = segment_md.table['process_params']['orderedcol'].index('process_id')
                temp_process_params_process_id=segment_md.table['process_params']['stream'][j][process_id_index]
                #  This next bit looks a bit strange, but the goal is to pull off only the param and value from each row of the process_params table, and then put them into the process_metadata
                #  Thus we loop through the columns in each row and toss out everything but the param and value entries, and then outside the for loop, append them to the args list
                for i, entry in enumerate(segment_md.table['process_params']['orderedcol']):
                    if entry=="param":
                        temp_param=str(segment_md.table['process_params']['stream'][j][i])
                    if entry=="value":
                        temp_value=str(segment_md.table['process_params']['stream'][j][i])
                process_dict[temp_process_params_process_id]['process_metadata']['args'].append(str(temp_param))
                process_dict[temp_process_params_process_id]['process_metadata']['args'].append(str(temp_value))

        #if debug:
        #    import pdb
        #    pdb.set_trace()
        
        temp_id_to_flag_version = {}
        
        for i in flag_versions_numbered.keys():
            ifo = flag_versions_numbered[i]['ifos']
            name = flag_versions_numbered[i]['name']
            version = flag_versions_numbered[i]['version']
            if (ifo,name,version) not in flag_versions.keys():
                if new_comments==True:
                    flag_versions[(ifo,name,version)] = InsertFlagVersion(ifo,name,version)
                else:
                    flag_versions[(ifo,name,version)] = InsertFlagVersionOld(ifo,name,version)
                if new_comments:
                    flag_versions[(ifo,name,version)].flag_description=str(flag_versions_numbered[i]['comment']) # old segment_definer comment = new flag_description
                    # OUTDATED PLACEHOLDER: flag_versions[(ifo,name,version)].version_comment=str(flag_versions_numbered[i]['comment'])
                else:
                    flag_versions[(ifo,name,version)].flag_comment=str(flag_versions_numbered[i]['comment'])
                    flag_versions[(ifo,name,version)].version_comment=str(flag_versions_numbered[i]['comment'])
            flag_versions[(ifo,name,version)].temporary_definer_id = flag_versions_numbered[i]['segment_def_id']
            flag_versions[(ifo,name,version)].temporary_process_id = flag_versions_numbered[i]['process_id']
            # Populate reverse lookup dictionary:
            temp_id_to_flag_version[flag_versions[(ifo,name,version)].temporary_definer_id] = (ifo,name,version)
        
        
        # ways to solve the metadata problem:
        # Associate each insertion_metadata block with a process, then group
        # them and take the min insert_data_start and max insert_data_stop
        
        
        # parse segment_summary table and associate known segments with
        # flag_versions above:
        ## Note this next line is needed for looping over multiple files
        for i in flag_versions.keys():
            flag_versions[i].temp_process_ids={}
        for j in range(len(segment_md.table['segment_summary']['stream'])):
            #flag_versions_numbered[j] = {}
            seg_def_index = segment_md.table['segment_summary']['orderedcol'].index('segment_def_id')
            #print "associated seg_def_id is: "+ segment_md.table['segment_summary']['stream'][j][seg_def_index]
            (ifo,name,version) = temp_id_to_flag_version[segment_md.table['segment_summary']['stream'][j][seg_def_index]]
            seg_sum_index = segment_md.table['segment_summary']['orderedcol'].index('segment_sum_id')
            # Unneeded:
            #flag_versions[(ifo,name,version)].temporary_segment_sum_id = segment_md.table['segment_summary']['stream'][j][seg_sum_index]
            start_time_index = segment_md.table['segment_summary']['orderedcol'].index('start_time')
            end_time_index = segment_md.table['segment_summary']['orderedcol'].index('end_time')
            start_time = segment_md.table['segment_summary']['stream'][j][start_time_index]+offset
            end_time = segment_md.table['segment_summary']['stream'][j][end_time_index]+offset
            comment_index = segment_md.table['segment_summary']['orderedcol'].index('comment')
            seg_sum_comment=segment_md.table['segment_summary']['stream'][j][comment_index]
            new_seg_summary = segments.segmentlist([segments.segment(start_time,end_time)])
            flag_versions[(ifo,name,version)].appendKnown(new_seg_summary)
            # Now I need to build up the insertion_metadata dictionary for this
            # summary: 
            # Now I need to associate the right process with the known
            # segments here, and put the start and end time into the
            # insertion_metadata part of the
            #  insert_history dict
            # Plan for processes and affected data:
            # Loop through segment summaries
            # If we haven't seen the associated process before, create it:
            # First, append the temp_process_id to temp_process_ids
            # Then, each temp_process_ids entry is a dictionary, where the one
            # element is start_affected time, and the other is end_affected
            # time, and later we will combine this with the correct
            # process_metadata dictionary
            process_id_index = segment_md.table['segment_summary']['orderedcol'].index('process_id')
            temp_process_id = segment_md.table['segment_summary']['stream'][j][process_id_index]
            if temp_process_id in flag_versions[(ifo,name,version)].temp_process_ids.keys():
                # We don't need to append this process metadata, as it already
                # exists We do need to extend the affected data start and stop
                # to match
                if start_time < flag_versions[(ifo,name,version)].temp_process_ids[temp_process_id]['insert_data_start']:
                    flag_versions[(ifo,name,version)].temp_process_ids[temp_process_id]['insert_data_start'] = start_time
                if end_time > flag_versions[(ifo,name,version)].temp_process_ids[temp_process_id]['insert_data_stop']:
                    flag_versions[(ifo,name,version)].temp_process_ids[temp_process_id]['insert_data_stop'] = end_time
            else:
                # Need to make the dictionary entry for this process_id
                if seg_sum_comment!=None:
                    flag_versions[(ifo,name,version)].provenance_url=seg_sum_comment
                else:
                    flag_versions[(ifo,name,version)].provenance_url=''
                flag_versions[(ifo,name,version)].temp_process_ids[temp_process_id] = {}
                flag_versions[(ifo,name,version)].temp_process_ids[temp_process_id]['insert_data_start'] = start_time
                flag_versions[(ifo,name,version)].temp_process_ids[temp_process_id]['insert_data_stop'] = end_time
        
        
        # Now, I need to append an insert_history element to the flag_versions
        # for this ifo,name, version, as I have the correct insertion_metadata
        # and the correct
        # process_metadata (from the process_dict earlier
        if debug:
            t1=time.time()
        for i in flag_versions.keys():
            for pid in flag_versions[i].temp_process_ids.keys():
                start = flag_versions[i].temp_process_ids[pid]['insert_data_start']
                stop = flag_versions[i].temp_process_ids[pid]['insert_data_stop']
                if new_comments:
                    flag_versions[i].flag_version_comment=process_dict[pid]['process_comment']  
                insert_history_dict = {}
                try:
                    insert_history_dict['process_metadata'] = process_dict[pid]['process_metadata']
                except:
                    raise
                #    import pdb
                #    pdb.set_trace()
                insert_history_dict['insertion_metadata'] = {}
                insert_history_dict['insertion_metadata']['insert_data_stop'] = stop
                insert_history_dict['insertion_metadata']['insert_data_start'] = start
                ifo = flag_versions[i].ifo
                version = flag_versions[i].version
                name = flag_versions[i].name
                insert_history_dict['insertion_metadata']['uri'] = '/dq/'+'/'.join([str(ifo),str(name),str(version)])  # FIX make dq a constant string in case we ever change it
                #print ifo,name,version
                insert_history_dict['insertion_metadata']['timestamp'] = _UTCToGPS(time.gmtime())
                insert_history_dict['insertion_metadata']['auth_user']=process.get_username()
                #if hackDec11:
                #    # note that this only uses one insert_history...despite
                #    all that hard work to get the list right...
                #    # so this might break something...
                #    flag_versions[i].insert_history=insert_history_dict
                #else:
                #    flag_versions[i].insert_history.append(insert_history_dict)
                flag_versions[i].insert_history.append(insert_history_dict)
        
        # parse segment table and associate known segments with flag_versions
        # above:
        try:
            for j in range(len(segment_md.table['segment']['stream'])):
                #flag_versions_numbered[j] = {}
                seg_def_index = segment_md.table['segment']['orderedcol'].index('segment_def_id')
                #print "associated seg_def_id is: "+ 
                #    segment_md.table['segment']['stream'][j][seg_def_index]
                (ifo,name,version) = temp_id_to_flag_version[segment_md.table['segment']['stream'][j][seg_def_index]]
                #seg_sum_index = segment_md.table['segment']['orderedcol'].index('segment_sum_id')
                start_time_index = segment_md.table['segment']['orderedcol'].index('start_time')
                end_time_index = segment_md.table['segment']['orderedcol'].index('end_time')
                start_time = segment_md.table['segment']['stream'][j][start_time_index]+offset
                end_time = segment_md.table['segment']['stream'][j][end_time_index]+offset
                new_seg = segments.segmentlist([segments.segment(start_time,end_time)])
                flag_versions[(ifo,name,version)].appendActive(new_seg)
        except KeyError:
            logger.info("No segment table for this file: %s" % filename)
            if debug:
                print "No segment table for this file: %s" % filename
        except:
            print "Unexpected error:", sys.exc_info()[0]
            raise

    for i in flag_versions.keys():
        flag_versions[i].coalesceInsertHistory()
    
    if threads>1:
        # Call this after the loop over files, and we should be good to go
        concurrent=min(threads,len(i)) # Fix!!! why did I do len(i) ???
        q=Queue(concurrent*2) # Fix!!! Improvement: remove hardcoded concurrency
        for i in range(concurrent):
            t=Thread(target=threadedPatchWithFailCases, args=[q,server,debug,logger])
            t.daemon=True
            t.start()
        for i in flag_versions.values():
            i.buildFlagDictFromInsertVersion()
            #i.flagDict
            url=i.buildURL(server)
            if debug:
                print url
                logger.debug("json.dumps(i.flagDict):")
                logger.debug("%s"%json.dumps(i.flagDict))
            #if hackDec11:
            #    if len(i.active)==0:
            #        print "No segments for this url"
            #        continue
            q.put(i)
        q.join()
    else:
        for i in flag_versions.values():
            i.buildFlagDictFromInsertVersion()
            #i.flagDict
            url=i.buildURL(server)
            if debug:
                logger.debug("Url for the following data: %s" % url)
                #print url
                logger.debug("json.dumps(i.flagDict):")
                logger.debug("%s"%json.dumps(i.flagDict))
            #if hackDec11:
            #    if len(i.active)==0:
            #        print "No segments for this url"
            #        continue
            patchWithFailCases(i,url,debug,logger,testing_options)

    if debug:
        logger.debug("If we made it this far, no errors were encountered in the inserts.")
        #print "If we made it this far, no errors were encountered in the inserts."
    ### Fix!!! Improvement: Should be more careful about error handling here.
    if debug:
        t2=time.time()
        logger.debug("Time elapsed for file %s = %d." % (filename,t2-t1))
        #print "Time elapsed for file %s = %d." % (filename,t2-t1)
    return True
Esempio n. 7
0
def set_process_end_time(process):
    """
	Set the end time in a row in a process table to the current time.
	"""
    process.end_time = _UTCToGPS(time.gmtime())
    return process