Python File.add_pfn Examples, pycbc.workflow.core.File.add_pfn Python Examples

Example #1

0

Show file

File: plotting.py Project: josh-willis/pycbc

def make_veto_table(workflow, out_dir, vetodef_file=None, tags=None):
    """ Creates a node in the workflow for writing the veto_definer
    table. Returns a File instances for the output file.
    """
    if vetodef_file is None:
        if not workflow.cp.has_option_tags("workflow-segments",
                                           "segments-veto-definer-file", []):
            return None
        vetodef_file = workflow.cp.get_opt_tags("workflow-segments",
                                                "segments-veto-definer-file",
                                                [])
        file_url = urljoin('file:', pathname2url(vetodef_file))
        vdf_file = File(workflow.ifos,
                        'VETO_DEFINER',
                        workflow.analysis_time,
                        file_url=file_url)
        vdf_file.add_pfn(file_url, site='local')
    else:
        vdf_file = vetodef_file

    if tags is None: tags = []
    makedir(out_dir)
    node = PlotExecutable(workflow.cp,
                          'page_vetotable',
                          ifos=workflow.ifos,
                          out_dir=out_dir,
                          tags=tags).create_node()
    node.add_input_opt('--veto-definer-file', vdf_file)
    node.new_output_file_opt(workflow.analysis_time, '.html', '--output-file')
    workflow += node
    return node.output_files[0]

Example #2

0

Show file

File: grb_utils.py Project: JulianWesterweck/pycbc

def get_coh_PTF_files(cp, ifos, run_dir, bank_veto=False, summary_files=False):
    """
    Retrieve files needed to run coh_PTF jobs within a PyGRB workflow

    Parameters
    ----------
    cp : pycbc.workflow.configuration.WorkflowConfigParser object
    The parsed configuration options of a pycbc.workflow.core.Workflow.

    ifos : str
    String containing the analysis interferometer IDs.

    run_dir : str
    The run directory, destination for retrieved files.

    bank_veto : Boolean
    If true, will retrieve the bank_veto_bank.xml file.

    summary_files : Boolean
    If true, will retrieve the summary page style files.

    Returns
    -------
    file_list : pycbc.workflow.FileList object
    A FileList containing the retrieved files.
    """
    if os.getenv("LAL_SRC") is None:
        raise ValueError("The environment variable LAL_SRC must be set to a "
                         "location containing the file lalsuite.git")
    else:
        lalDir = os.getenv("LAL_SRC")
        sci_seg = segments.segment(int(cp.get("workflow", "start-time")),
                                   int(cp.get("workflow", "end-time")))
        file_list = FileList([])

        # Bank veto
        if bank_veto:
            shutil.copy("%s/lalapps/src/ring/coh_PTF_config_files/" \
                        "bank_veto_bank.xml" % lalDir, "%s" % run_dir)
            bank_veto_url = "file://localhost%s/bank_veto_bank.xml" % run_dir
            bank_veto = File(ifos,
                             "bank_veto_bank",
                             sci_seg,
                             file_url=bank_veto_url)
            # FIXME: Is this an input file? If so use the from_path classmethod
            bank_veto.add_pfn(bank_veto.cache_entry.path, site="local")
            file_list.extend(FileList([bank_veto]))

        if summary_files:
            # summary.js file
            shutil.copy("%s/lalapps/src/ring/coh_PTF_config_files/" \
                        "coh_PTF_html_summary.js" % lalDir, "%s" % run_dir)
            summary_js_url = "file://localhost%s/coh_PTF_html_summary.js" \
                             % run_dir
            summary_js = File(ifos,
                              "coh_PTF_html_summary_js",
                              sci_seg,
                              file_url=summary_js_url)
            summary_js.add_pfn(summary_js.cache_entry.path, site="local")
            file_list.extend(FileList([summary_js]))

            # summary.css file
            shutil.copy("%s/lalapps/src/ring/coh_PTF_config_files/" \
                        "coh_PTF_html_summary.css" % lalDir, "%s" % run_dir)
            summary_css_url = "file://localhost%s/coh_PTF_html_summary.css" \
                              % run_dir
            summary_css = File(ifos,
                               "coh_PTF_html_summary_css",
                               sci_seg,
                               file_url=summary_css_url)
            summary_css.add_pfn(summary_css.cache_entry.path, site="local")
            file_list.extend(FileList([summary_css]))

        return file_list

Example #3

0

Show file

File: grb_utils.py Project: JulianWesterweck/pycbc

def make_exttrig_file(cp, ifos, sci_seg, out_dir):
    '''
    Make an ExtTrig xml file containing information on the external trigger

    Parameters
    ----------
    cp : pycbc.workflow.configuration.WorkflowConfigParser object
    The parsed configuration options of a pycbc.workflow.core.Workflow.

    ifos : str
    String containing the analysis interferometer IDs.

    sci_seg : ligo.segments.segment
    The science segment for the analysis run.

    out_dir : str
    The output directory, destination for xml file.

    Returns
    -------
    xml_file : pycbc.workflow.File object
    The xml file with external trigger information.

    '''
    # Initialise objects
    xmldoc = ligolw.Document()
    xmldoc.appendChild(ligolw.LIGO_LW())
    tbl = lsctables.New(lsctables.ExtTriggersTable)
    cols = tbl.validcolumns
    xmldoc.childNodes[-1].appendChild(tbl)
    row = tbl.appendRow()

    # Add known attributes for this GRB
    setattr(row, "event_ra", float(cp.get("workflow", "ra")))
    setattr(row, "event_dec", float(cp.get("workflow", "dec")))
    setattr(row, "start_time", int(cp.get("workflow", "trigger-time")))
    setattr(row, "event_number_grb", str(cp.get("workflow", "trigger-name")))

    # Fill in all empty rows
    for entry in cols.keys():
        if not hasattr(row, entry):
            if cols[entry] in ['real_4', 'real_8']:
                setattr(row, entry, 0.)
            elif cols[entry] == 'int_4s':
                setattr(row, entry, 0)
            elif cols[entry] == 'lstring':
                setattr(row, entry, '')
            elif entry == 'process_id':
                row.process_id = ilwd.ilwdchar("external_trigger:process_id:0")
            elif entry == 'event_id':
                row.event_id = ilwd.ilwdchar("external_trigger:event_id:0")
            else:
                print("Column %s not recognized" % (entry), file=sys.stderr)
                raise ValueError

    # Save file
    xml_file_name = "triggerGRB%s.xml" % str(cp.get("workflow",
                                                    "trigger-name"))
    xml_file_path = os.path.join(out_dir, xml_file_name)
    utils.write_filename(xmldoc, xml_file_path)
    xml_file_url = urljoin("file:", pathname2url(xml_file_path))
    xml_file = File(ifos, xml_file_name, sci_seg, file_url=xml_file_url)
    xml_file.add_pfn(xml_file_url, site="local")

    return xml_file

Example #4

0

Show file

File: datafind.py Project: OliverEdy/pycbc

def run_datafind_instance(cp, outputDir, connection, observatory, frameType,
                          startTime, endTime, ifo, tags=None):
    """
    This function will query the datafind server once to find frames between
    the specified times for the specified frame type and observatory.

    Parameters
    ----------
    cp : ConfigParser instance
        Source for any kwargs that should be sent to the datafind module
    outputDir : Output cache files will be written here. We also write the
        commands for reproducing what is done in this function to this
        directory.
    connection : datafind connection object
        Initialized through the `gwdatafind` module, this is the open
        connection to the datafind server.
    observatory : string
        The observatory to query frames for. Ex. 'H', 'L' or 'V'.  NB: not
        'H1', 'L1', 'V1' which denote interferometers.
    frameType : string
        The frame type to query for.
    startTime : int
        Integer start time to query the datafind server for frames.
    endTime : int
        Integer end time to query the datafind server for frames.
    ifo : string
        The interferometer to use for naming output. Ex. 'H1', 'L1', 'V1'.
        Maybe this could be merged with the observatory string, but this
        could cause issues if running on old 'H2' and 'H1' data.
    tags : list of string, optional (default=None)
        Use this to specify tags. This can be used if this module is being
        called more than once to give call specific configuration (by setting
        options in [workflow-datafind-${TAG}] rather than [workflow-datafind]).
        This is also used to tag the Files returned by the class to uniqueify
        the Files and uniquify the actual filename.
        FIXME: Filenames may not be unique with current codes!

    Returns
    --------
    dfCache : glue.lal.Cache instance
       The glue.lal.Cache representation of the call to the datafind
       server and the returned frame files.
    cacheFile : pycbc.workflow.core.File
        Cache file listing all of the datafind output files for use later in the pipeline.

    """
    if tags is None:
        tags = []

    seg = segments.segment([startTime, endTime])
    # Take the datafind kwargs from config (usually urltype=file is
    # given).
    dfKwargs = {}
    # By default ignore missing frames, this case is dealt with outside of here
    dfKwargs['on_gaps'] = 'ignore'
    if cp.has_section("datafind"):
        for item, value in cp.items("datafind"):
            dfKwargs[item] = value
    for tag in tags:
        if cp.has_section('datafind-%s' %(tag)):
            for item, value in cp.items("datafind-%s" %(tag)):
                dfKwargs[item] = value

    # It is useful to print the corresponding command to the logs
    # directory to check if this was expected.
    log_datafind_command(observatory, frameType, startTime, endTime,
                         os.path.join(outputDir,'logs'), **dfKwargs)
    logging.debug("Asking datafind server for frames.")
    dfCache = lal.Cache.from_urls(
        connection.find_frame_urls(observatory, frameType,
                                   startTime, endTime, **dfKwargs),
    )
    logging.debug("Frames returned")
    # workflow format output file
    cache_file = File(ifo, 'DATAFIND', seg, extension='lcf',
                      directory=outputDir, tags=tags)
    cache_file.add_pfn(cache_file.cache_entry.path, site='local')

    dfCache.ifo = ifo
    # Dump output to file
    fP = open(cache_file.storage_path, "w")
    # FIXME: CANNOT use dfCache.tofile because it will print 815901601.00000
    #        as a gps time which is incompatible with the lal cache format
    #        (and the C codes) which demand an integer.
    #dfCache.tofile(fP)
    for entry in dfCache:
        start = str(int(entry.segment[0]))
        duration = str(int(abs(entry.segment)))
        print("%s %s %s %s %s" \
              % (entry.observatory, entry.description, start, duration, entry.url), file=fP)
        entry.segment = segments.segment(int(entry.segment[0]), int(entry.segment[1]))

    fP.close()
    return dfCache, cache_file

Example #5

0

Show file

File: datafind.py Project: OliverEdy/pycbc

def convert_cachelist_to_filelist(datafindcache_list):
    """
    Take as input a list of glue.lal.Cache objects and return a pycbc FileList
    containing all frames within those caches.

    Parameters
    -----------
    datafindcache_list : list of glue.lal.Cache objects
        The list of cache files to convert.

    Returns
    --------
    datafind_filelist : FileList of frame File objects
        The list of frame files.
    """
    prev_file = None
    prev_name = None
    this_name = None

    datafind_filelist = FileList([])

    for cache in datafindcache_list:
        # sort the cache into time sequential order
        cache.sort()
        curr_ifo = cache.ifo
        for frame in cache:
            # Pegasus doesn't like "localhost" in URLs.
            frame.url = frame.url.replace('file://localhost','file://')

            # Create one File() object for each unique frame file that we
            # get back in the cache.
            if prev_file:
                prev_name = os.path.basename(prev_file.cache_entry.url)
                this_name = os.path.basename(frame.url)

            if (prev_file is None) or (prev_name != this_name):
                currFile = File(curr_ifo, frame.description,
                    frame.segment, file_url=frame.url, use_tmp_subdirs=True)
                datafind_filelist.append(currFile)
                prev_file = currFile

            # Populate the PFNs for the File() we just created
            if frame.url.startswith('file://'):
                currFile.add_pfn(frame.url, site='local')
                if frame.url.startswith(
                    'file:///cvmfs/oasis.opensciencegrid.org/ligo/frames'):
                    # Datafind returned a URL valid on the osg as well
                    # so add the additional PFNs to allow OSG access.
                    currFile.add_pfn(frame.url, site='osg')
                    currFile.add_pfn(frame.url.replace(
                        'file:///cvmfs/oasis.opensciencegrid.org/',
                        'root://xrootd-local.unl.edu/user/'), site='osg')
                    currFile.add_pfn(frame.url.replace(
                        'file:///cvmfs/oasis.opensciencegrid.org/',
                        'gsiftp://red-gridftp.unl.edu/user/'), site='osg')
                    currFile.add_pfn(frame.url.replace(
                        'file:///cvmfs/oasis.opensciencegrid.org/',
                        'gsiftp://ldas-grid.ligo.caltech.edu/hdfs/'), site='osg')
                elif frame.url.startswith(
                    'file:///cvmfs/gwosc.osgstorage.org/'):
                    # Datafind returned a URL valid on the osg as well
                    # so add the additional PFNs to allow OSG access.
                    for s in ['osg', 'orangegrid', 'osgconnect']:
                        currFile.add_pfn(frame.url, site=s)
                        currFile.add_pfn(frame.url, site="{}-scratch".format(s))
            else:
                currFile.add_pfn(frame.url, site='notlocal')

    return datafind_filelist