예제 #1
0
def check_download_attempts():
    """For each download attempt with status 'downloading' check
        to see that its thread is still active. If not, mark it
        as 'unknown', and mark the file as 'unverified'.
    """
    attempts = jobtracker.query("SELECT * FROM download_attempts " \
                                "WHERE status='downloading'")

    active_ids = [int(t.getName()) for t in threading.enumerate() \
                            if isinstance(t, DownloadThread)]

    for attempt in attempts:
        if attempt['id'] not in active_ids:
            dlm_cout.outs("Download attempt (ID: %d) is no longer running." % \
                            attempt['id'])
            queries = []
            queries.append("UPDATE files " \
                           "SET status='unverified', " \
                                "updated_at='%s', " \
                                "details='Download thread is no longer running' "
                           "WHERE id=%d" % (jobtracker.nowstr(), attempt['download_id']))
            queries.append("UPDATE download_attempts " \
                           "SET status='unknown', " \
                                "updated_at='%s', " \
                                "details='Download thread is no longer running' "
                           "WHERE id=%d" % (jobtracker.nowstr(), attempt['id']))
            jobtracker.query(queries)
예제 #2
0
def status():
    """Print downloader's status to screen.
    """
    used = get_space_used()
    avail = get_space_available()
    allowed = config.download.space_to_use
    print "Space used by downloaded files: %.2f GB of %.2f GB (%.2f%%)" % \
            (used/1024.0**3, allowed/1024.0**3, 100.0*used/allowed)
    print "Space available on file system: %.2f GB" % (avail/1024.0**3)

    numwait = jobtracker.query("SELECT COUNT(*) FROM requests " \
                               "WHERE status='waiting'", \
                               fetchone=True)[0]
    numfail = jobtracker.query("SELECT COUNT(*) FROM requests " \
                               "WHERE status='failed'", \
                               fetchone=True)[0]
    print "Number of requests waiting: %d" % numwait
    print "Number of failed requests: %d" % numfail

    numdlactive = jobtracker.query("SELECT COUNT(*) FROM files " \
                                   "WHERE status='downloading'", \
                                   fetchone=True)[0]
    numdlfail = jobtracker.query("SELECT COUNT(*) FROM files " \
                                 "WHERE status='failed'", \
                                 fetchone=True)[0]
    print "Number of active downloads: %d" % numdlactive
    print "Number of failed downloads: %d" % numdlfail
예제 #3
0
def make_request():
    """Make a request for data to be restored by connecting to the
        web services at Cornell.
    """
    dlm_cout.outs("Requesting data")
    num_beams = 1
    web_service = CornellWebservice.Client()
    guid = web_service.Restore(username=config.download.api_username, \
                               pw=config.download.api_password, \
                               number=num_beams, \
                               bits=config.download.request_numbits, \
                               fileType=config.download.request_datatype)
    if guid == "fail":
        raise pipeline_utils.PipelineError("Request for restore returned 'fail'.")

    requests = jobtracker.query("SELECT * FROM requests " \
                             "WHERE guid='%s'" % guid)
    if requests:
        # Entries in the requests table exist with this GUID!?
        raise pipeline_utils.PipelineError("There are %d requests in the " \
                                           "job-tracker DB with this GUID %s" % \
                                           (len(requests), guid))

    jobtracker.query("INSERT INTO requests ( " \
                        "guid, " \
                        "created_at, " \
                        "updated_at, " \
                        "status, " \
                        "details) " \
                     "VALUES ('%s', '%s', '%s', '%s', '%s')" % \
                     (guid, jobtracker.nowstr(), jobtracker.nowstr(), 'waiting', \
                        'Newly created request'))
예제 #4
0
def download(attempt):
    """Given a row from the job-tracker's download_attempts table,
        actually attempt the download.
    """
    file = jobtracker.query("SELECT * FROM files " \
                            "WHERE id=%d" % attempt['download_id'], \
                            fetchone=True)
    request = jobtracker.query("SELECT * FROM requests " \
                               "WHERE id=%d" % file['request_id'], \
                               fetchone=True)

    queries = []
    try:
        cftp = CornellFTP.CornellFTP()
        cftp.download(os.path.join(request['guid'], file['remote_filename']))
    except Exception, e:
        raise
        queries.append("UPDATE files " \
                       "SET status='failed', " \
                            "updated_at='%s', " \
                            "details='Download failed - %s' " \
                       "WHERE id=%d" % \
                       (jobtracker.nowstr(), str(e), file['id']))
        queries.append("UPDATE download_attempts " \
                       "SET status='download_failed', " \
                            "details='Download failed - %s', " \
                            "updated_at='%s' " \
                       "WHERE id=%d" % \
                       (str(e), jobtracker.nowstr(), attempt['id']))
예제 #5
0
def can_request_more():
    """Returns whether Downloader can request more restores.
        This is based on took disk space allowed for downloaded
        file, disk space available on the file system, and maximum
        number of active requests allowed.

    Inputs:
        None
    Output:
        can_request: A boolean value. True if Downloader can make a request.
                        False otherwise.
    """
    # Note: Files are restored in pairs (so we multiply by 2)
    active_requests = jobtracker.query("SELECT IFNULL(SUM(numrequested), 0) " \
                                       "FROM requests " \
                                       "WHERE status='waiting'", fetchone=True)
    to_download = jobtracker.query("SELECT * FROM files " \
                                   "WHERE status NOT IN ('downloaded', " \
                                                        "'added', " \
                                                        "'deleted', " \
                                                        "'terminal_failure')")
    if active_requests == None:
	active_requests = 0
    num_to_restore = active_requests
    num_to_download = len(to_download)
    used = get_space_used()
    reserved = get_space_committed()

    can_request = ((num_to_restore+num_to_download) < config.download.numrestored) and \
            (used+reserved < config.download.space_to_use)
    return can_request
예제 #6
0
파일: job.py 프로젝트: plazar/pipeline2.0
def update_jobs_status_from_queue():
    """
    Updates Database entries for job processing according to the Jobs' Queue Status.
    """

    # Collect all non processed jobs from db linking to downloaded files
    submits = jobtracker.query("SELECT * FROM job_submits " "WHERE status='running'")
    for submit in submits:
        # Check if job is still running (according to queue manager)
        is_running = config.jobpooler.queue_manager.is_running(submit["queue_id"])
        if is_running:
            # Do nothing.
            pass
        else:
            # Check if processing had errors
            if config.jobpooler.queue_manager.had_errors(submit["queue_id"]):
                # Errors during processing...
                errormsg = config.jobpooler.queue_manager.get_errors(submit["queue_id"])

                jobpool_cout.outs(
                    "Processing of Job #%d (Submit ID: %d; Queue ID: %s) "
                    "had errors." % (submit["job_id"], submit["id"], submit["queue_id"])
                )

                # Mark job entry with status 'failed'
                # Mark job_submit entry with status 'processing_failed'
                queries = []
                arglists = []
                queries.append(
                    "UPDATE jobs "
                    "SET status='failed', "
                    "updated_at=?, "
                    "details='Errors during processing' "
                    "WHERE id=?"
                )
                arglists.append((jobtracker.nowstr(), submit["job_id"]))
                queries.append(
                    "UPDATE job_submits " "SET status='processing_failed', " "details=?, " "updated_at=? " "WHERE id=?"
                )
                arglists.append((errormsg, jobtracker.nowstr(), submit["id"]))
                jobtracker.execute(queries, arglists)
            else:
                # No errors. Woohoo!
                # Mark job and job_submit entries with status 'processed'
                queries = []
                queries.append(
                    "UPDATE jobs "
                    "SET status='processed', "
                    "updated_at='%s', "
                    "details='Processed without errors' "
                    "WHERE id=%d" % (jobtracker.nowstr(), submit["job_id"])
                )
                queries.append(
                    "UPDATE job_submits "
                    "SET status='processed', "
                    "updated_at='%s', "
                    "details='Processed without error' "
                    "WHERE id=%d" % (jobtracker.nowstr(), submit["id"])
                )
                jobtracker.query(queries)
예제 #7
0
def main():
    jobids = set([int(id) for id in args])
    jobids.update(options.jobids)

    for fn in options.files:
        rows = jobtracker.query("SELECT job_files.job_id FROM job_files " \
                               "LEFT JOIN files " \
                                    "ON files.id = job_files.file_id " \
                               "WHERE files.filename LIKE '%%%s' " % fn)
        for row in rows:
            jobids.add(row['job_id'])
    print "Number of jobs to kill: %d" % len(jobids)
    for jobid in jobids:
        print "Attempting to kill job with id %d" % jobid
        row = jobtracker.query("SELECT status FROM jobs " \
                                "WHERE id=%d" % jobid, \
                                fetchone=True)
        if row['status'] in ['new', 'retrying']:
            jobtracker.query("UPDATE jobs " \
                             "SET status='terminal_failure', " \
                                  "updated_at='%s', " \
                                  "details='Job was killed manually' " \
                             "WHERE id=%d" % \
                             (jobtracker.nowstr(), jobid))
            print "Job's status has been set to 'terminal_failure'"
            pipeline_utils.clean_up(jobid)
        else:
            print "Only jobs whose status is 'waiting' or 'retrying' " \
                  "can be killed. (Current status of job %d: %s)" % \
                  (jobid, row['status'])
예제 #8
0
def check_active_requests():
    """Check for any requests with status='waiting'. If there are
        some, check if the files are ready for download.
    """
    active_requests = jobtracker.query("SELECT * FROM requests " \
                                       "WHERE status='waiting'")
    
    web_service = CornellWebservice.Client()
    for request in active_requests:
        location = web_service.Location(guid=request['guid'], \
                                        username=config.download.api_username, \
                                        pw=config.download.api_password)
        if location == "done":
            dlm_cout.outs("Restore (%s) is done. Will create file entries." % \
                            request['guid'])
            create_file_entries(request)
        else:
            query = "SELECT (julianday('%s')-julianday(created_at))*24 " \
                        "AS deltaT_hours " \
                    "FROM requests " \
                    "WHERE guid='%s'" % \
                        (jobtracker.nowstr(), request['guid'])
            row = jobtracker.query(query, fetchone=True)
            if row['deltaT_hours'] > config.download.request_timeout:
                dlm_cout.outs("Restore (%s) is over %d hr old " \
                                "and still not ready. Marking " \
                                "it as failed." % \
                        (request['guid'], config.download.request_timeout))
                jobtracker.query("UPDATE requests " \
                                 "SET status='failed', " \
                                    "details='Request took too long (> %d hr)', " \
                                    "updated_at='%s' " \
                                 "WHERE guid='%s'" % \
                    (config.download.request_timeout, jobtracker.nowstr(), \
                            request['guid']))
예제 #9
0
def create_parallel_folding_jobs():
    """Check job-tracker DB for processed jobs. Submit 
	successive jobs and create
        entries in the jobs table.
    """
    Jobs = check_parallel_jobs()

    file_ids = Jobs.keys()
    file_ids.sort()

    queries = []
    for file_id in file_ids:

        # retrieve file_ids
        # rows = jobtracker.query("SELECT job_id from job_files " \
        # "WHERE file_id=%d AND task LIKE 'search%'"%file_id)
        #
        #        files_ids = [str(row['file_id']) for row in rows]

        # Submit all parallel jobs ()
        # for istep in range(config.searching.ddplans['nuppi']):
        if 1:
            # task_name = "folding %d"%istep # TODO
            task_name = "folding"
            queries.append(
                "INSERT INTO jobs ("
                "created_at, "
                "details, "
                "status, "
                "task, "
                "updated_at) "
                "VALUES ('%s', '%s', '%s', '%s', '%s')"
                % (jobtracker.nowstr(), "Newly created job", "new", task_name, jobtracker.nowstr())
            )

            #            for file_id in files_ids:
            queries.append(
                "INSERT INTO job_files ("
                "file_id, "
                "created_at, "
                "job_id, "
                "updated_at) "
                "SELECT id, '%s', (SELECT LAST_INSERT_ID()), '%s' "
                "FROM files "
                "WHERE id=%d" % (jobtracker.nowstr(), jobtracker.nowstr(), int(file_id))
            )

        # Mark the previous task as 'done'
        for job_id in Jobs[file_id]:
            queries.append(
                "UPDATE jobs "
                "SET status='done', "
                "updated_at='%s', "
                "details='Processed without errors' "
                "WHERE id=%d" % (jobtracker.nowstr(), job_id)
            )
    jobtracker.query(queries)
예제 #10
0
def get_num_to_request():
    """Return the number of files to request given the average
        time to download a file (including waiting time) and
        the amount of space available.

        Inputs:
            None

        Outputs:
            num_to_request: The size of the request.
    """
    #ALLOWABLE_REQUEST_SIZES = [1, 3, 5, 8, 12]
    ALLOWABLE_REQUEST_SIZES = [1, 3, 5, 8]
    avgrate = jobtracker.query("SELECT AVG(files.size/" \
                                "(TO_SECONDS(download_attempts.updated_at)*1/86400. - " \
                                "TO_SECONDS(download_attempts.created_at)*1/86400.)) " \
                               "FROM files, download_attempts " \
                               "WHERE files.id=download_attempts.file_id " \
                                    "AND download_attempts.status='downloaded'", \
                               fetchone=True)
    avgsize = jobtracker.query("SELECT AVG(size/numrequested) FROM requests " \
                               "WHERE numbits=%d AND " \
                                    "file_type='%s'" % \
                                (config.download.request_numbits, \
                                    config.download.request_datatype.lower()), \
                                fetchone=True)
    if avgrate is None or avgsize is None:
        return min(ALLOWABLE_REQUEST_SIZES)

    # Total number requested that can be downloaded per day (on average).
    max_to_request_per_day = avgrate/avgsize
    
    used = get_space_used()
    avail = get_space_available()
    reserved = get_space_committed()
    
    # Maximum number of bytes that we should request
    max_bytes = min([avail-reserved-config.download.min_free_space, \
                        config.download.space_to_use-reserved-used])
    # Maximum number to request
    max_num = max_bytes/avgsize

    ideal_num_to_request = min([max_num, max_to_request_per_day])

    if debug.DOWNLOAD:
        print "Average dl rate: %.2f bytes/day" % avgrate
        print "Average size per request unit: %d bytes" % avgsize
        print "Max can dl per day: %d" % max_to_request_per_day
        print "Max num to request: %d" % max_num
        print "Ideal to request: %d" % ideal_num_to_request

    # Return the closest allowable request size without exceeding
    # 'ideal_num_to_request'
    num_to_request = max([0]+[N for N in ALLOWABLE_REQUEST_SIZES \
                            if N <= ideal_num_to_request])

    return num_to_request
예제 #11
0
def make_request(dbname='default'):
    """Make a request for data to be restored by connecting to the
        data server.
    """
    num_beams = get_num_to_request()
    if not num_beams:
        # Request size is 0
        return
    dlm_cout.outs("Requesting data\nIssuing a request of size %d" % num_beams)

    # Ask to restore num_beams
    db = database.Database(dbname)
    QUERY = "SELECT f.obs_id FROM full_processing as f LEFT JOIN  processing AS p ON f.obs_id = p.obs_id WHERE f.status='available' AND p.details is NULL LIMIT %d"%num_beams
    db.cursor.execute(QUERY)
    obs_ids = [row[0] for row in db.cursor.fetchall()]

    # Ask for an uuid
    QUERY = "SELECT  UUID();"
    db.cursor.execute(QUERY)
    guid = db.cursor.fetchone()[0]

    if not obs_ids:
        print "There are no files to be restored."
        return

    # Mark the beams for restorations
    for obs_id in obs_ids:
        QUERY = "UPDATE full_processing SET status='requested', guid='%s', updated_at=NOW() WHERE obs_id=%s"%(guid, obs_id)
        db.cursor.execute(QUERY)
    db.conn.close()

    #if guid == "fail":
    #   raise pipeline_utils.PipelineError("Request for restore returned 'fail'.")

    requests = jobtracker.query("SELECT * FROM requests WHERE guid='%s'" % guid)

    if requests:
        # Entries in the requests table exist with this GUID!?
        raise pipeline_utils.PipelineError("There are %d requests in the " \
                               "job-tracker DB with this GUID %s" % \
                               (len(requests), guid))

    jobtracker.query("INSERT INTO requests ( " \
                        "numbits, " \
                        "numrequested, " \
                        "file_type, " \
                        "guid, " \
                        "created_at, " \
                        "updated_at, " \
                        "status, " \
                        "details) " \
                     "VALUES (%d, %d, '%s', '%s', '%s', '%s', '%s', '%s')" % \
                     (config.download.request_numbits, num_beams, \
                        config.download.request_datatype, guid, \
                        jobtracker.nowstr(), jobtracker.nowstr(), 'waiting', \
                        'Newly created request'))
예제 #12
0
def verify_files():
    """For all downloaded files with status 'unverify' verify the files.
    """
    toverify = jobtracker.query("SELECT * FROM files " \
                                "WHERE status='unverified'")

    for file in toverify:
        if os.path.exists(file['filename']):
            actualsize = os.path.getsize(file['filename'])
        else:
            actualsize = -1
        expectedsize = file['size']

        last_attempt_id = jobtracker.query("SELECT id " \
                                           "FROM download_attempts " \
                                           "WHERE download_id=%s " \
                                           "ORDER BY id DESC " % file['id'], \
                                           fetchone=True)[0]
                                                
        queries = []
        if actualsize == expectedsize:
            dlm_cout.outs("Download of %s is complete and verified." % \
                            os.path.split(file['filename'])[-1])
            # Everything checks out!
            queries.append("UPDATE files " \
                           "SET status='downloaded', " \
                                "details='Download is complete and verified', " \
                                "updated_at='%s'" \
                           "WHERE id=%d" % \
                           (jobtracker.nowstr(), file['id']))
            queries.append("UPDATE download_attempts " \
                           "SET status='downloaded', " \
                                "details='Download is complete and verified', " \
                                "updated_at='%s'" \
                           "WHERE id=%d" % \
                           (jobtracker.nowstr(), last_attempt_id))
        else:
            dlm_cout.outs("Verification of %s failed. \n" \
                            "\tActual size (%d bytes) != Expected size (%d bytes)" % \
                            (os.path.split(file['filename'])[-1], actualsize, expectedsize))
            
            # Boo... verification failed.
            queries.append("UPDATE files " \
                           "SET status='failed', " \
                                "details='Downloaded file failed verification', " \
                                "updated_at='%s'" \
                           "WHERE id=%d" % \
                           (jobtracker.nowstr(), file['id']))
            queries.append("UPDATE download_attempts " \
                           "SET status='verification_failed', " \
                                "details='Downloaded file failed verification', " \
                                "updated_at='%s'" \
                           "WHERE id=%d" % \
                           (jobtracker.nowstr(), last_attempt_id))
        jobtracker.query(queries)
예제 #13
0
def create_parallel_search_jobs():
    """Check job-tracker DB for processed jobs. Submit 
	successive jobs and create
        entries in the jobs table.
    """
    # Look for job with rfifind done
    rows = jobtracker.query("SELECT * from jobs " "WHERE status='processed' " "AND task='rfifind'")

    queries = []
    for row in rows:

        # retrieve file_ids
        rows2 = jobtracker.query("SELECT * from job_files " "WHERE job_id=%d" % row["id"])

        files_ids = [str(row2["file_id"]) for row2 in rows2]

        # Submit all parallel jobs (1 job per DDplan)
        for istep in range(len(config.searching.ddplans["nuppi"])):
            task_name = "search %d" % istep  # TODO
            queries.append(
                "INSERT INTO jobs ("
                "created_at, "
                "details, "
                "status, "
                "task, "
                "updated_at) "
                "VALUES ('%s', '%s', '%s', '%s', '%s')"
                % (jobtracker.nowstr(), "Newly created job", "new", task_name, jobtracker.nowstr())
            )

            rows = jobtracker.query("SELECT filename FROM files WHERE id IN ('%s')")

            for file_id in files_ids:
                queries.append(
                    "INSERT INTO job_files ("
                    "file_id, "
                    "created_at, "
                    "job_id, "
                    "updated_at) "
                    "SELECT id, '%s', (SELECT LAST_INSERT_ID()), '%s' "
                    "FROM files "
                    "WHERE id=%d" % (jobtracker.nowstr(), jobtracker.nowstr(), int(file_id))
                )

        # Mark the previous task as 'done'
        queries.append(
            "UPDATE jobs "
            "SET status='done', "
            "updated_at='%s', "
            "details='Processed without errors' "
            "WHERE id=%d" % (jobtracker.nowstr(), row["id"])
        )
    jobtracker.query(queries)
예제 #14
0
def create_jobs_for_new_files():
    """Check job-tracker DB for newly downloaded files. Group
        jobs that belong to the same observation and create
        entries in the jobs table.
    """
    # Get files that were not associated with a job yet
    rows = jobtracker.query(
        "SELECT filename FROM files "
        "LEFT JOIN job_files "
        "ON job_files.file_id=files.id "
        "WHERE files.status IN ('downloaded', 'added') "
        "AND job_files.id IS NULL"
    )
    newfns = [str(row["filename"]) for row in rows]

    # Group together files that belong together
    groups = datafile.simple_group_files(newfns)

    # Keep only groups that are not missing any files
    complete_groups = [grp for grp in groups if SPAN512_job.is_complete(grp)]

    if complete_groups:
        jobpool_cout.outs("Inserting %d new entries into jobs table" % len(complete_groups))

    # Label the first task
    task_name = "rfifind"

    for complete in complete_groups:
        # Insert new job and link it to data files
        queries = []
        queries.append(
            "INSERT INTO jobs ("
            "created_at, "
            "details, "
            "status, "
            "task, "
            "updated_at) "
            "VALUES ('%s', '%s', '%s', '%s', '%s')"
            % (jobtracker.nowstr(), "Newly created job", "new", task_name, jobtracker.nowstr())
        )
        queries.append(
            "INSERT INTO job_files ("
            "file_id, "
            "created_at, "
            "job_id, "
            "updated_at) "
            "SELECT id, '%s', (SELECT LAST_INSERT_ID()), '%s' "
            "FROM files "
            "WHERE filename IN ('%s')" % (jobtracker.nowstr(), jobtracker.nowstr(), "', '".join(complete))
        )
        jobtracker.query(queries)
예제 #15
0
파일: job.py 프로젝트: plazar/pipeline2.0
def submit_jobs():
    """
    Submits jobs to the queue for processing.
    
    ***NOTE: Priority is given to jobs with status 'retrying'.
    """
    jobs = []
    jobs.extend(jobtracker.query("SELECT * FROM jobs " "WHERE status='retrying' " "ORDER BY updated_at ASC"))
    jobs.extend(jobtracker.query("SELECT * FROM jobs " "WHERE status='new'" "ORDER BY updated_at ASC"))
    for job in jobs:
        if config.jobpooler.queue_manager.can_submit():
            submit(job)
        else:
            break
예제 #16
0
def main():
    ready_for_upload = jobtracker.query("SELECT * FROM job_uploads WHERE status='checked'")
    waiting_check = jobtracker.query("SELECT * FROM job_uploads WHERE status='new'")
    uploaded = jobtracker.query("SELECT * FROM job_uploads WHERE status='uploaded'")
    for ru in ready_for_upload:
        print "%s\t%s" % (ru['job_id'],"Checked and ready for upload.")
    for ru in waiting_check:
        print "%s\t%s" % (ru['job_id'],"Processed and ready to be checked.")
    for ru in uploaded:
        print "%s\t%s" % (ru['job_id'],"Processed and Uploaded.")

    print "\nNum. of jobs ready for    upload: %u" % len(ready_for_upload)
    print "Num. of jobs waiting for dry-run: %u" % len(waiting_check)
    print "Num. of uploaded jobs : %u" % len(uploaded)
예제 #17
0
def mark_finished_beams():
    """
    """
    finished_jobs = jobtracker.query("SELECT * FROM jobs WHERE " "task like 'folding%' AND status='processed'")

    queries = []
    for finished_job in finished_jobs:
        queries.append(
            "UPDATE jobs "
            "SET status='finished', "
            "updated_at='%s', "
            "details='Finished without error' "
            "WHERE id=%d" % (jobtracker.nowstr(), finished_job["id"])
        )
    jobtracker.query(queries)
예제 #18
0
def show_status(scr):
    active_downloads = jobtracker.query("SELECT * FROM files " \
                                        "WHERE status IN ('downloading', " \
                                                         "'unverified', " \
                                                         "'new') " \
                                        "ORDER BY created_at ASC")
    scr.clear()
    maxy, maxx = scr.getmaxyx()
    scr.addstr(0,0, "Number of active downloads: %d" % len(active_downloads), \
                curses.A_BOLD | curses.A_UNDERLINE)

    for ii, file in enumerate(active_downloads[:(maxy-2)/2-1]):
        fn = os.path.split(file['filename'])[-1]
        scr.addstr(2+ii*2, 0, fn, curses.A_BOLD)
        scr.addstr(2+ii*2, len(fn), " - %s" % file['status'])
        if os.path.exists(file['filename']):
            currsize = os.path.getsize(file['filename'])
        else:
            currsize = 0
        pcnt_complete = float(currsize)/file['size']
        scr.addstr(3+ii*2, 0, "[")
        scr.addstr(3+ii*2, 1, "="*int(pcnt_complete*(maxx-12))+">")
        scr.addstr(3+ii*2, maxx-10, "]")
        scr.addstr(3+ii*2, maxx-8, "(%5.1f%%)" % (pcnt_complete*100))

    scr.refresh()
예제 #19
0
    def update(self):
        active_downloads = jobtracker.query("SELECT * FROM files " \
                                            "WHERE status IN ('downloading', " \
                                                             "'unverified', " \
                                                             "'new') " \
                                            "ORDER BY created_at ASC")
        for dl in active_downloads:
            found = False
            for f in self:
                if dl['filename'] == f.name:
                    f.status=dl['status']
                    found = True
            if not found:
                self.append(File(dl['filename'], dl['size'], dl['status'], dl['updated_at']))

        for ii, f in enumerate(self):
            found = False
            for dl in active_downloads:
                if dl['filename'] == f.name:
                    found = True
                    if dl['status'] == 'downloading':
                        t = time.strptime(dl['updated_at'], '%Y-%m-%d %H:%M:%S')
                        f.starttime = time.mktime(t)
            if not found:
                self.pop(ii)
            else:
                f.update()
예제 #20
0
def create_sifting_jobs():
    """Check job-tracker DB for processed jobs. Submit 
	successive jobs and create
        entries in the jobs table.
    """

    # First make sur that all plans are done
    rows = jobtracker.query("SELECT * from jobs " "WHERE status='processed' " "AND 'search' LIKE task")
    # TODO: how to find out that the parallel task are done ?

    rows = jobtracker.query(
        "SELECT jobs.task, job_files.file_id  FROM jobs "
        "LEFT JOIN job_files "
        "ON job_files.job_id=jobs.id "
        "WHERE jobs.status='processed' AND 'search' LIKE jobs.task"
    )
예제 #21
0
def download(attempt):
    """Given a row from the job-tracker's download_attempts table,
        actually attempt the download.
    """
    file = jobtracker.query("SELECT * FROM files " \
                            "WHERE id=%d" % attempt['file_id'], \
                            fetchone=True)
    request = jobtracker.query("SELECT * FROM requests " \
                               "WHERE id=%d" % file['request_id'], \
                               fetchone=True)

    queries = []

    # Download using bbftp
    res = DownloaderSPAN512.exec_download(request, file)


    # bbftp should report 'get filename OK' if the transfer is successfull
    if res == 'OK': 
        queries.append("UPDATE files " \
                       "SET status='unverified', " \
                            "updated_at='%s', " \
                            "details='Download is complete - File is unverified' " \
                       "WHERE id=%d" % \
                       (jobtracker.nowstr(), file['id']))
        queries.append("UPDATE download_attempts " \
                       "SET status='complete', " \
                            "details='Download is complete', " \
                            "updated_at='%s' " \
                       "WHERE id=%d" % \
                       (jobtracker.nowstr(), attempt['id']))
    else:		       
	queries.append("UPDATE files " \
                       "SET status='failed', " \
                            "updated_at='%s', " \
                            "details='Download failed - %s' " \
                       "WHERE id=%d" % \
                       (jobtracker.nowstr(), str(res), file['id']))
	queries.append("UPDATE download_attempts " \
                       "SET status='download_failed', " \
                            "details='Download failed - %s', " \
                            "updated_at='%s' " \
                       "WHERE id=%d" % \
                       (str(res), jobtracker.nowstr(), attempt['id']))

    jobtracker.query(queries)
예제 #22
0
def run():
    """
    Drives the process of uploading results of the completed jobs.

    """
    query = "SELECT * FROM jobs " \
            "WHERE status='finished'"
    finished_jobs = jobtracker.query(query)
    print "Found %d finished jobs waiting for upload" % len(finished_jobs)
    for ii, job in enumerate(finished_jobs):
        # Get the job's most recent submit
        submit = jobtracker.query("SELECT * FROM job_submits " \
                                  "WHERE job_id=%d " \
                                    "AND status='processed' " \
                                  "ORDER BY id DESC" % job['id'], fetchone=True)
        print "Upload %d of %d" % (ii+1, len(finished_jobs))
        upload_results(submit)
예제 #23
0
def main():
    downloading = jobtracker.query(
        "SELECT * FROM files, download_attempts WHERE download_attempts.status='downloading' AND files.id=download_attempts.file_id"
    )
    for download in downloading:
        print "%s\t\t%s" % (download['remote_filename'], download['details'])

    print "\nTotal: %u" % len(downloading)
예제 #24
0
def run():
    """
    Drives the process of uploading results of the completed jobs.

    """
    query = "SELECT * FROM jobs " \
            "WHERE status='processed'"
    processed_jobs = jobtracker.query(query)
    print "Found %d processed jobs waiting for upload" % len(processed_jobs)
    for ii, job in enumerate(processed_jobs):
        # Get the job's most recent submit
        submit = jobtracker.query("SELECT * FROM job_submits " \
                                  "WHERE job_id=%d " \
                                    "AND status='processed' " \
                                  "ORDER BY id DESC" % job['id'], fetchone=True)
        print "Upload %d of %d" % (ii + 1, len(processed_jobs))
        upload_results(submit)
def main():
    for queue_id in args:
        job_submits = jobtracker.query("SELECT id, job_id, queue_id " \
                                       "FROM job_submits " \
                                       "WHERE queue_id LIKE '%s'" % queue_id)
        if len(job_submits) != 1:
            sys.stderr.write("Bad number (%d) of job submissions for queue " \
                                "ID provided: %s\n" % (len(job_submits), queue_id))
        elif config.jobpooler.queue_manager.is_running(
                job_submits[0]['queue_id']):
            print "Stopping job: %s" % job_submits[0]['queue_id']

            queries = []
            if options.fail:
                queries.append("UPDATE job_submits " \
                               "SET status='stopped', " \
                                    "updated_at='%s', " \
                                    "details='Job was manually failed' " \
                               "WHERE id=%d" % \
                                (jobtracker.nowstr(), job_submits[0]['id']))
                queries.append("UPDATE jobs " \
                               "SET status='failed', " \
                                    "updated_at='%s', " \
                                    "details='Job was manually failed' " \
                               "WHERE id=%d" % \
                                (jobtracker.nowstr(), job_submits[0]['job_id']))
            else:
                queries.append("DELETE FROM job_submits " \
                               "WHERE id=%d" % job_submits[0]['id'])
                queries.append("UPDATE jobs " \
                               "SET status='retrying', " \
                                    "updated_at='%s', " \
                                    "details='Job was manually removed, politely' " \
                               "WHERE id=%d" % \
                                (jobtracker.nowstr(), job_submits[0]['job_id']))
            jobtracker.query(queries)
            try:
                config.jobpooler.queue_manager.delete(
                    job_submits[0]['queue_id'])
            except pipeline_utils.PipelineError, e:
                print "PipelineError: %s" % str(e)

        else:
            sys.stderr.write("There is no job currently in the queue with " \
                                "the ID provided: %s\n" % job_submits[0]['queue_id'])
예제 #26
0
def status(log=True):
    """
    Displays number of jobs processed, uploaded, waiting, waiting retry, failed.

    Input(s):
        Optional:
            log : Default to True, will write to a configured log file,
                    else will only output the information to stdout
    Output(s):
        Displays number of jobs processed, uploaded, waiting, waiting retry, failed.
    """
    running_jobs = jobtracker.query("SELECT count(*) FROM jobs WHERE status='submitted'")
    processed_jobs = jobtracker.query("SELECT count(*) FROM jobs WHERE status='processed'")
    uploaded_jobs = jobtracker.query("SELECT count(*) FROM jobs WHERE status='uploaded' and updated_at>'2019-08-15 00:00:00' ")
    new_jobs = jobtracker.query("SELECT count(*) FROM jobs WHERE status='new'")
    failed_jobs = jobtracker.query("SELECT count(*) FROM jobs WHERE status='failed'")
    retrying_jobs = jobtracker.query("SELECT count(*) FROM jobs WHERE status='retrying'")
    dead_jobs = jobtracker.query("SELECT count(*) FROM jobs WHERE status='terminal_failure'")
    status_str= "\n\n================= Job Pool Status ==============\n"
    status_str+="Num. of jobs            running: %d\n" % running_jobs[0][0]#len(running_jobs)
    status_str+="Num. of jobs          processed: %d\n" % processed_jobs[0][0]#len(processed_jobs)
    status_str+="Num. of jobs   uploaded(beluga): %d\n" % uploaded_jobs[0][0]#len(uploaded_jobs)
    status_str+="Num. of jobs            waiting: %d\n" % new_jobs[0][0]#len(new_jobs)
    status_str+="Num. of jobs      waiting retry: %d\n" % retrying_jobs[0][0]#len(retrying_jobs)
    status_str+="Num. of jobs             failed: %d\n" % failed_jobs[0][0]#len(failed_jobs)
    status_str+="Num. of jobs permanently failed: %d\n" % dead_jobs[0][0]#len(dead_jobs)
    if log:
        jobpool_cout.outs(status_str)
    else:
        print status_str
예제 #27
0
파일: job.py 프로젝트: sn1p3/pipeline2.0
def status(log=True):
    """
    Displays number of jobs processed, uploaded, waiting, waiting retry, failed.

    Input(s):
        Optional:
            log : Default to True, will write to a configured log file,
                    else will only output the information to stdout
    Output(s):
        Displays number of jobs processed, uploaded, waiting, waiting retry, failed.
    """
    running_jobs = jobtracker.query("SELECT * FROM jobs WHERE status='submitted'")
    processed_jobs = jobtracker.query("SELECT * FROM jobs WHERE status='processed'")
    uploaded_jobs = jobtracker.query("SELECT * FROM jobs WHERE status='uploaded'")
    new_jobs = jobtracker.query("SELECT * FROM jobs WHERE status='new'")
    failed_jobs = jobtracker.query("SELECT * FROM jobs WHERE status='failed'")
    retrying_jobs = jobtracker.query("SELECT * FROM jobs WHERE status='retrying'")
    dead_jobs = jobtracker.query("SELECT * FROM jobs WHERE status='terminal_failure'")

    status_str= "\n\n================= Job Pool Status ==============\n"
    status_str+="Num. of jobs            running: %d\n" % len(running_jobs)
    status_str+="Num. of jobs          processed: %d\n" % len(processed_jobs)
    status_str+="Num. of jobs           uploaded: %d\n" % len(uploaded_jobs)
    status_str+="Num. of jobs            waiting: %d\n" % len(new_jobs)
    status_str+="Num. of jobs      waiting retry: %d\n" % len(retrying_jobs)
    status_str+="Num. of jobs             failed: %d\n" % len(failed_jobs)
    status_str+="Num. of jobs permanently failed: %d\n" % len(dead_jobs)
    if log:
        jobpool_cout.outs(status_str)
    else:
        print status_str
예제 #28
0
def make_request(num_beams=None):
    """Make a request for data to be restored by connecting to the
        web services at Cornell.
    """
    if not num_beams:
        num_beams = get_num_to_request()
        if not num_beams:
            # Request size is 0
            return
    dlm_cout.outs("Requesting data\nIssuing a request of size %d" % num_beams)

    web_service = CornellWebservice.Client()
    guid = web_service.Restore(username=config.download.api_username, \
                               pw=config.download.api_password, \
                               pipeline=config.basic.pipeline.lower(), \
                               number=num_beams, \
                               bits=config.download.request_numbits, \
                               fileType=config.download.request_datatype)
    print "guid:", guid
    if guid == "fail":
        raise pipeline_utils.PipelineError(
            "Request for restore returned 'fail'.")

    requests = jobtracker.query("SELECT * FROM requests " \
                             "WHERE guid='%s'" % guid)
    if requests:
        # Entries in the requests table exist with this GUID!?
        raise pipeline_utils.PipelineError("There are %d requests in the " \
                                           "job-tracker DB with this GUID %s" % \
                                           (len(requests), guid))

    jobtracker.query("INSERT INTO requests ( " \
                        "numbits, " \
                        "numrequested, " \
                        "file_type, " \
                        "guid, " \
                        "created_at, " \
                        "updated_at, " \
                        "status, " \
                        "details) " \
                     "VALUES (%d, %d, '%s', '%s', '%s', '%s', '%s', '%s')" % \
                     (config.download.request_numbits, num_beams, \
                        config.download.request_datatype, guid, \
                        jobtracker.nowstr(), jobtracker.nowstr(), 'waiting', \
                        'Newly created request'))
예제 #29
0
파일: job.py 프로젝트: kstovall/pipeline2.0
def submit_jobs():
    """
    Submits jobs to the queue for processing.
    
    ***NOTE: Priority is given to jobs with status 'retrying'.
    """
    jobs = []
    jobs.extend(jobtracker.query("SELECT * FROM jobs " \
                                 "WHERE status='retrying' " \
                                 "ORDER BY updated_at ASC"))
    jobs.extend(jobtracker.query("SELECT * FROM jobs " \
                                 "WHERE status='new'" \
                                 "ORDER BY updated_at ASC"))
    for job in jobs:
        if config.jobpooler.queue_manager.can_submit():
            submit(job)
        else:
            break
예제 #30
0
파일: job.py 프로젝트: kstovall/pipeline2.0
def create_jobs_for_new_files():
    """Check job-tracker DB for newly downloaded files. Group
        jobs that belong to the same observation and create
        entries in the jobs table.
    """
    # Get files that aren't already associated with a job
    rows = jobtracker.query("SELECT filename FROM files " \
                            "LEFT JOIN job_files " \
                                "ON job_files.file_id=files.id " \
                            "WHERE files.status='downloaded' " \
                                "AND job_files.id IS NULL")
    newfns = [str(row['filename']) for row in rows]

    # Group together files that belong together
    groups = datafile.group_files(newfns)

    # Keep only groups that are not missing any files
    complete_groups = [grp for grp in groups if datafile.is_complete(grp)]

    if complete_groups:
        jobpool_cout.outs("Inserting %d new entries into jobs table" % \
                            len(complete_groups))
    for complete in complete_groups:
        # Insert new job and link it to data files
        queries = []
        queries.append("INSERT INTO jobs (" \
                            "created_at, " \
                            "details, " \
                            "status, " \
                            "updated_at) " \
                       "VALUES ('%s', '%s', '%s', '%s')" % \
                        (jobtracker.nowstr(), 'Newly created job', \
                            'new', jobtracker.nowstr()))
        queries.append("INSERT INTO job_files (" \
                            "file_id, " \
                            "created_at, " \
                            "job_id, " \
                            "updated_at) " \
                       "SELECT id, '%s', (SELECT LAST_INSERT_ROWID()), '%s' " \
                       "FROM files " \
                       "WHERE filename IN ('%s')" % \
                       (jobtracker.nowstr(), jobtracker.nowstr(), \
                        "', '".join(complete)))
        jobtracker.query(queries)
예제 #31
0
파일: job.py 프로젝트: sn1p3/pipeline2.0
def create_jobs_for_new_files():
    """Check job-tracker DB for newly downloaded files. Group
        jobs that belong to the same observation and create
        entries in the jobs table.
    """
    # Get files that aren't already associated with a job
    rows = jobtracker.query("SELECT filename FROM files " \
                            "LEFT JOIN job_files " \
                                "ON job_files.file_id=files.id " \
                            "WHERE files.status='downloaded' " \
                                "AND job_files.id IS NULL")
    newfns = [str(row['filename']) for row in rows]

    # Group together files that belong together
    groups = datafile.group_files(newfns)

    # Keep only groups that are not missing any files
    complete_groups = [grp for grp in groups if datafile.is_complete(grp)]

    if complete_groups:
        jobpool_cout.outs("Inserting %d new entries into jobs table" % \
                            len(complete_groups))
    for complete in complete_groups:
        # Insert new job and link it to data files
        queries = []
        queries.append("INSERT INTO jobs (" \
                            "created_at, " \
                            "details, " \
                            "status, " \
                            "updated_at) " \
                       "VALUES ('%s', '%s', '%s', '%s')" % \
                        (jobtracker.nowstr(), 'Newly created job', \
                            'new', jobtracker.nowstr()))
        queries.append("INSERT INTO job_files (" \
                            "file_id, " \
                            "created_at, " \
                            "job_id, " \
                            "updated_at) " \
                       "SELECT id, '%s', (SELECT LAST_INSERT_ROWID()), '%s' " \
                       "FROM files " \
                       "WHERE filename IN ('%s')" % \
                       (jobtracker.nowstr(), jobtracker.nowstr(), \
                        "', '".join(complete)))
        jobtracker.query(queries)
예제 #32
0
def main():
    for queue_id in args:
        job_submits = jobtracker.query("SELECT id, job_id, queue_id " \
                                       "FROM job_submits " \
                                       "WHERE queue_id LIKE '%s'" % queue_id)
        if len(job_submits) != 1:
            sys.stderr.write("Bad number (%d) of job submissions for queue " \
                                "ID provided: %s\n" % (len(job_submits), queue_id))
        elif config.jobpooler.queue_manager.is_running(job_submits[0]['queue_id']):
            print "Stopping job: %s" % job_submits[0]['queue_id']

            queries = []
            if options.fail:
                queries.append("UPDATE job_submits " \
                               "SET status='stopped', " \
                                    "updated_at='%s', " \
                                    "details='Job was manually failed' " \
                               "WHERE id=%d" % \
                                (jobtracker.nowstr(), job_submits[0]['id']))
                queries.append("UPDATE jobs " \
                               "SET status='failed', " \
                                    "updated_at='%s', " \
                                    "details='Job was manually failed' " \
                               "WHERE id=%d" % \
                                (jobtracker.nowstr(), job_submits[0]['job_id']))
            else:
                queries.append("DELETE FROM job_submits " \
                               "WHERE id=%d" % job_submits[0]['id'])
                queries.append("UPDATE jobs " \
                               "SET status='retrying', " \
                                    "updated_at='%s', " \
                                    "details='Job was manually removed, politely' " \
                               "WHERE id=%d" % \
                                (jobtracker.nowstr(), job_submits[0]['job_id']))
            jobtracker.query(queries)
            try:
                config.jobpooler.queue_manager.delete(job_submits[0]['queue_id'])
            except pipeline_utils.PipelineError, e:
                print "PipelineError: %s" % str(e)

        else:
            sys.stderr.write("There is no job currently in the queue with " \
                                "the ID provided: %s\n" % job_submits[0]['queue_id'])
예제 #33
0
def remove_file(fn):
    """Delete a file (if it exists) and mark it as deleted in the 
        job-tracker DB.

        Input:
            fn: The name of the file to remove.

        Outputs:
            None
    """
    if os.path.exists(fn):
        os.remove(fn)
        print "Deleted: %s" % fn
    jobtracker.query("UPDATE files " \
                     "SET status='deleted', " \
                         "updated_at='%s', " \
                         "details='File was deleted' " \
                     "WHERE filename='%s'" % \
                     (jobtracker.nowstr(), fn))
예제 #34
0
def main():
    failed_jobs = jobtracker.query("SELECT id, updated_at FROM jobs " \
                                   "WHERE status='terminal_failure'" \
                                   "ORDER BY updated_at ASC")
    for job in failed_jobs:
        last_submit = jobtracker.query("SELECT id, job_id, status, details, updated_at " \
                                       "FROM job_submits " \
                                       "WHERE job_id=%d " \
                                       "ORDER BY updated_at DESC" % job['id'], \
                                       fetchone=True)
        headerline = "========== Job ID: %d, last job submission ID: %d ==========" % \
            (last_submit['job_id'], last_submit['id'])
        print headerline
        print "Last job submission status: %s (%s)" % \
            (last_submit['status'], last_submit['updated_at'])
        if options.full:
            print last_submit['details']
        print "-"*len(headerline)
        print ""
예제 #35
0
def main():
    failed_jobs = jobtracker.query("SELECT id, updated_at FROM jobs " \
                                   "WHERE status='terminal_failure'" \
                                   "ORDER BY updated_at ASC")
    for job in failed_jobs:
        last_submit = jobtracker.query("SELECT id, job_id, status, details, updated_at " \
                                       "FROM job_submits " \
                                       "WHERE job_id=%d " \
                                       "ORDER BY updated_at DESC" % job['id'], \
                                       fetchone=True)
        headerline = "========== Job ID: %d, last job submission ID: %d ==========" % \
            (last_submit['job_id'], last_submit['id'])
        print headerline
        print "Last job submission status: %s (%s)" % \
            (last_submit['status'], last_submit['updated_at'])
        if options.full:
            print last_submit['details']
        print "-" * len(headerline)
        print ""
예제 #36
0
def main():
    parser = OptionParser(usage)

    parser.add_option(
        "-d",
        "--days",
        type="float",
        dest="nb_days",
        default=60.0,
        help="Minimum number of days for plannified observations (default=60)")

    (opts, args) = parser.parse_args()

    QUERY = "SELECT * FROM Headers AS H LEFT JOIN PDM_Candidates AS C ON \
	H.header_id=C.header_id LEFT JOIN PDM_Classifications AS R ON \
	R.pdm_cand_id=C.pdm_cand_id WHERE R.person_id=2 AND R.rank=6;"

    results = jobtracker.query(QUERY, db="remote-SPAN512")
    print "Found %d known pulsars in the database" % (len(results))

    Known_psrs = PSR_list()
    Known_psrs.load_known_psrs()
    print "Read %d known pulsars from %s" % (len(Known_psrs),
                                             KNOWN_PSR_filename)

    psrnames = np.array([])
    snr = np.array([])
    sig = np.array([])

    for res in results:
        #print res
        print "SRC = %s  Period = %s  Tint = %s  SNR = %s  Sigma = %s" % (
            res['source_name'], res['bary_period'], res['observation_time'],
            res['snr'], res['rescaled_prepfold_sigma'])
        ra_rad, dec_rad = slalib.sla_galeq(
            res['galactic_longitude'] * np.pi / 180.,
            res['galactic_latitude'] * np.pi / 180.)

        psr_id = Known_psrs.find_known_psrs(ra_rad, dec_rad)

        if psr_id:
            print Known_psrs.show_known_psrs(psr_id)
            print " "

            snr = np.append(snr, res['snr'])
            psrnames = np.append(psrnames, Known_psrs.get_psrname(psr_id))

        else:
            print "None found\n"

    plot(snr, 'ro')
    xticks(np.arange(len(psrnames)), psrnames, rotation=90)
    tight_layout(pad=1.2)
    ylabel('SNR')
    show()
예제 #37
0
def start_downloads():
    """Check for entries in the files table with status 'retrying'
        or 'new' and start the downloads.
    """
    todownload  = jobtracker.query("SELECT * FROM files " \
                                   "WHERE status='retrying' " \
                                   "ORDER BY created_at ASC")
    todownload += jobtracker.query("SELECT * FROM files " \
                                   "WHERE status='new' " \
                                   "ORDER BY created_at ASC")

    for file in todownload:
        if can_download():
            dlm_cout.outs("Initiating download of %s" % \
                            os.path.split(file['filename'])[-1])

            # Update file status and insert entry into download_attempts
            queries = []
            queries.append("UPDATE files " \
                           "SET status='downloading', " \
                                "details='Initiated download', " \
                                "updated_at='%s' " \
                            "WHERE id=%d" % \
                            (jobtracker.nowstr(), file['id']))
            queries.append("INSERT INTO download_attempts (" \
                                "status, " \
                                "details, " \
                                "updated_at, " \
                                "created_at, " \
                                "file_id) " \
                           "VALUES ('%s', '%s', '%s', '%s', %d)" % \
                           ('downloading', 'Initiated download', jobtracker.nowstr(), \
                                jobtracker.nowstr(), file['id']))
            insert_id = jobtracker.query(queries)
            attempt = jobtracker.query("SELECT * FROM download_attempts " \
                                       "WHERE id=%d" % insert_id, \
                                       fetchone=True)
    
            # download(attempt)
            DownloadThread(attempt).start()
        else:
            break
예제 #38
0
def start_downloads():
    """Check for entries in the files table with status 'retrying'
        or 'new' and start the downloads.
    """
    todownload  = jobtracker.query("SELECT * FROM files " \
                                   "WHERE status='retrying' " \
                                   "ORDER BY created_at ASC")
    todownload += jobtracker.query("SELECT * FROM files " \
                                   "WHERE status='new' " \
                                   "ORDER BY created_at ASC")

    for file in todownload:
        if can_download():
            dlm_cout.outs("Initiating download of %s" % \
                            os.path.split(file['filename'])[-1])

            # Update file status and insert entry into download_attempts
            queries = []
            queries.append("UPDATE files " \
                           "SET status='downloading', " \
                                "details='Initiated download', " \
                                "updated_at='%s' " \
                            "WHERE id=%d" % \
                            (jobtracker.nowstr(), file['id']))
            queries.append("INSERT INTO download_attempts (" \
                                "status, " \
                                "details, " \
                                "updated_at, " \
                                "created_at, " \
                                "file_id) " \
                           "VALUES ('%s', '%s', '%s', '%s', %d)" % \
                           ('downloading', 'Initiated download', jobtracker.nowstr(), \
                                jobtracker.nowstr(), file['id']))
            insert_id = jobtracker.query(queries)
            attempt = jobtracker.query("SELECT * FROM download_attempts " \
                                       "WHERE id=%d" % insert_id, \
                                       fetchone=True)

            # download(attempt)
            DownloadThread(attempt).start()
        else:
            break
예제 #39
0
def get_data():
    create_times = jobtracker.query("SELECT DATETIME(created_at) FROM jobs")

    upload_times = jobtracker.query("SELECT DATETIME(updated_at) FROM jobs " \
                                        "WHERE status='uploaded'")

    fail_times = jobtracker.query("SELECT DATETIME(updated_at) FROM jobs " \
                                        "WHERE status='terminal_failure'")

    restore_times = jobtracker.query(
        "SELECT DATETIME(created_at) FROM requests")
    restore_sizes = jobtracker.query("SELECT numrequested FROM requests")
    rows = jobtracker.query("SELECT status FROM requests")
    restore_status = []
    for r in rows:
        if r['status'] == 'cleaned_up':
            restore_status.append((0, 1, 0))  # Greeen
        elif r['status'] == 'failed':
            restore_status.append((1, 0, 0))  # Red
        else:
            restore_status.append((0, 0, 1))  # Blue

    bytes_downloaded = jobtracker.query("SELECT files.size, " \
                                            "MAX(DATETIME(download_attempts.updated_at)) " \
                                        "FROM files, download_attempts " \
                                        "WHERE files.id=download_attempts.file_id " \
                                            "AND download_attempts.status IN ('downloaded', 'added') " \
                                        "GROUP BY files.id")
    bytes_deleted = jobtracker.query("SELECT -size, DATETIME(updated_at) " \
                                          "FROM files " \
                                          "WHERE status='deleted'")

    mkdatetime = lambda dt: datetime.datetime.strptime(dt, '%Y-%m-%d %H:%M:%S')

    create_times = np.asarray(
        sorted([mkdatetime(row[0]) for row in create_times]))
    upload_times = np.asarray(
        sorted([mkdatetime(row[0]) for row in upload_times]))
    fail_times = np.asarray(sorted([mkdatetime(row[0]) for row in fail_times]))

    restore_times = np.asarray([mkdatetime(row[0]) for row in restore_times])
    restore_sizes = np.asarray([row[0] for row in restore_sizes])

    bytes_times = bytes_downloaded + bytes_deleted
    bytes = np.asarray([row[0] for row in bytes_times])
    times = np.asarray([mkdatetime(row[1]) for row in bytes_times])
    isort = np.argsort(times)
    times = times[isort]
    bytes = np.cumsum(bytes[isort])

    return create_times, upload_times, fail_times, bytes, times, \
            restore_times, restore_sizes, restore_status
예제 #40
0
def remove_file(fn):
    """Delete a file (if it exists) and mark it as deleted in the 
        job-tracker DB.

        Input:
            fn: The name of the file to remove.

        Outputs:
            None
    """
    import jobtracker
    if os.path.exists(fn):
        os.remove(fn)
        print "Deleted: %s" % fn
    jobtracker.query("UPDATE files " \
                     "SET status='deleted', " \
                         "updated_at='%s', " \
                         "details='File was deleted' " \
                     "WHERE filename='%s'" % \
                     (jobtracker.nowstr(), fn))
예제 #41
0
def get_data():
    create_times = jobtracker.query("SELECT (created_at) FROM jobs")

    upload_times = jobtracker.query("SELECT (updated_at) FROM jobs " \
                                        "WHERE status='uploaded'")

    fail_times = jobtracker.query("SELECT (updated_at) FROM jobs " \
                                        "WHERE status='terminal_failure'")

    restore_times = jobtracker.query("SELECT (created_at) FROM requests")
    restore_sizes = jobtracker.query("SELECT numrequested FROM requests")
    rows = jobtracker.query("SELECT status FROM requests")
    restore_status = []
    for r in rows:
        if r['status'] == 'cleaned_up':
            restore_status.append((0,1,0)) # Greeen
        elif r['status'] == 'failed':
            restore_status.append((1,0,0)) # Red
        else:
            restore_status.append((0,0,1)) # Blue

    bytes_downloaded = jobtracker.query("SELECT files.size, " \
                                            "MAX((download_attempts.updated_at)) " \
                                        "FROM files, download_attempts " \
                                        "WHERE files.id=download_attempts.file_id " \
                                            "AND download_attempts.status IN ('downloaded', 'added') " \
                                        "GROUP BY files.id")
    bytes_deleted = jobtracker.query("SELECT -size, (updated_at) " \
                                          "FROM files " \
                                          "WHERE status='deleted'")

    print bytes_downloaded, bytes_deleted

    mkdatetime = lambda dt: datetime.datetime.strptime(dt, '%Y-%m-%d %H:%M:%S')

    create_times = np.asarray(sorted([mkdatetime(row[0]) for row in create_times]))
    upload_times = np.asarray(sorted([mkdatetime(row[0]) for row in upload_times]))
    fail_times = np.asarray(sorted([mkdatetime(row[0]) for row in fail_times]))

    restore_times = np.asarray([mkdatetime(row[0]) for row in restore_times])
    restore_sizes = np.asarray([row[0] for row in restore_sizes])

    bytes_times = bytes_downloaded + bytes_deleted
    bytes = np.asarray([row[0] for row in bytes_times])
    times = np.asarray([mkdatetime(row[1]) for row in bytes_times])
    isort = np.argsort(times)
    times = times[isort]
    bytes = np.cumsum(bytes[isort])

    return create_times, upload_times, fail_times, bytes, times, \
            restore_times, restore_sizes, restore_status
예제 #42
0
def check_active_requests():
    """Check for any requests with status='waiting'. If there are
        some, check if the files are ready for download.
    """

    active_requests = jobtracker.query("SELECT * FROM requests " \
                                       "WHERE status='waiting'")
    for request in active_requests:

	# Check requested status 
	if DownloaderSPAN512.check_request_done(request):
	    dlm_cout.outs("Restore (GUID: %s) has succeeded. Will create file entries.\n" % request['guid'])
	    create_file_entries(request)

	else:
#	    dlm_cout.outs("Request (GUID: %s) has failed.\n" \
#	             "\tDatabase failed to report the data as restored." % request['guid'])
#	    jobtracker.query("UPDATE requests SET status='failed', " \
#                     "details='Request failed. Why ?', " \
#                     "updated_at='%s' " \
#                     "WHERE guid='%s'" % (jobtracker.nowstr(), request['guid']))

            query = "SELECT (TO_SECONDS('%s')-TO_SECONDS(created_at)) " \
                        "AS deltaT_seconds " \
                    "FROM requests " \
                    "WHERE guid='%s'" % \
                        (jobtracker.nowstr(), request['guid'])
            row = jobtracker.query(query, fetchone=True)
            #if row['deltaT_seconds']/3600. > config.download.request_timeout:
            if row/3600. > config.download.request_timeout:
                dlm_cout.outs("Restore (GUID: %s) is over %d hr old " \
                                "and still not ready. Marking " \
                                "it as failed." % \
                        (request['guid'], config.download.request_timeout))
                jobtracker.query("UPDATE requests " \
                                 "SET status='failed', " \
                                    "details='Request took too long (> %d hr)', " \
                                    "updated_at='%s' " \
                                 "WHERE guid='%s'" % \
                    (config.download.request_timeout, jobtracker.nowstr(), \
                            request['guid']))
예제 #43
0
def check_active_requests():
    """Check for any requests with status='waiting'. If there are
        some, check if the files are ready for download.
    """
    active_requests = jobtracker.query("SELECT * FROM requests " \
                                       "WHERE status='waiting'")
    if not active_requests:
        print "Checking active requests: no active requests"

    web_service = CornellWebservice.Client()
    for request in active_requests:
        location = web_service.Location(guid=request['guid'], \
                                        username=config.download.api_username, \
                                        pw=config.download.api_password)
        print "location:", location, "\t\t", " guid:", request['guid']
        if location == "done":
            dlm_cout.outs("Restore (%s) is done. Will create file entries." % \
                            request['guid'])
            create_file_entries(request)
        else:

            query = "SELECT (julianday('%s')-julianday(created_at))*24 " \
                        "AS deltaT_hours " \
                    "FROM requests " \
                    "WHERE guid='%s'" % \
                        (jobtracker.nowstr(), request['guid'])

            row = jobtracker.query(query, fetchone=True)
            if row['deltaT_hours'] > config.download.request_timeout:
                dlm_cout.outs("Restore (%s) is over %d hr old " \
                                "and still not ready. Marking " \
                                "it as failed." % \
                        (request['guid'], config.download.request_timeout))
                jobtracker.query("UPDATE requests " \
                                 "SET status='failed', " \
                                    "details='Request took too long (> %d hr)', " \
                                    "updated_at='%s' " \
                                 "WHERE guid='%s'" % \
                    (config.download.request_timeout, jobtracker.nowstr(), \
                            request['guid']))
예제 #44
0
def recover_failed_downloads():
    """For each entry in the job-tracker DB's files table
        check if the download can be retried or not.
        Update status and clean up, as necessary.
    """
    failed_files = jobtracker.query("SELECT * FROM files " \
                                   "WHERE status='failed'")

    for file in failed_files:
        attempts = jobtracker.query("SELECT * FROM download_attempts " \
                                    "WHERE download_id=%d" % file['id'])
        if len(attempts) < config.download.numretries:
            # download can be retried
            jobtracker.query("UPDATE files " \
                             "SET status='retrying', " \
                                  "updated_at='%s', " \
                                  "details='Download will be attempted again' " \
                             "WHERE id=%s" % \
                             (jobtracker.nowstr(), file['id']))
        else:
            # Abandon this file
            if os.path.exists(file['filename']):
                os.remove(file['filename'])
            jobtracker.query("UPDATE files " \
                             "SET status='terminal_failure', " \
                                  "updated_at='%s', " \
                                  "details='This file has been abandoned' " \
                             "WHERE id=%s" % \
                             (jobtracker.nowstr(), file['id']))
예제 #45
0
def submit_jobs():
    """
    Submits jobs to the queue for processing.
    
    ***NOTE: Priority is given to jobs with status 'retrying'.
    """
    jobs = []

    special_query1 = "SELECT distinct j.* FROM jobs AS j JOIN job_files AS jf " \
                    "ON j.id=jf.job_id JOIN files AS f ON f.id=jf.file_id WHERE " \
                    "j.status in ('new','retrying') AND f.filename LIKE " \
                    "'%p2030.2019____.G__.%.fits' ORDER BY j.updated_at ASC"

    special_query2 = "SELECT distinct j.* FROM jobs AS j JOIN job_files AS jf " \
                    "ON j.id=jf.job_id JOIN files AS f ON f.id=jf.file_id WHERE " \
                    "j.status in ('new','retrying') AND f.filename LIKE " \
                    "'%p2030.2018____.G__.%.fits' ORDER BY j.updated_at ASC"

    special_query3 = "SELECT distinct j.* FROM jobs AS j JOIN job_files AS jf " \
                    "ON j.id=jf.job_id JOIN files AS f ON f.id=jf.file_id WHERE " \
                    "j.status in ('new','retrying') AND f.filename LIKE " \
                    "'%p2030.2019____.G___.%.fits' ORDER BY j.updated_at ASC"

    special_query4 = "SELECT distinct j.* FROM jobs AS j JOIN job_files AS jf " \
                    "ON j.id=jf.job_id JOIN files AS f ON f.id=jf.file_id WHERE " \
                    "j.status in ('new','retrying') AND f.filename LIKE " \
                    "'%p2030.2018____.G___.%.fits' ORDER BY j.updated_at ASC"


    if not len(jobs):
        jobs.extend(jobtracker.query(special_query1))
        print len(jobs),"in inner galaxy (2019 data)."
        jobs.extend(jobtracker.query(special_query2))
        print len(jobs),"in inner galaxy (2018 data)."
    if len(jobs)<50:
        jobs.extend(jobtracker.query(special_query3))
        print len(jobs),"in outer galaxy (2019 data)."
        jobs.extend(jobtracker.query(special_query4))
        print len(jobs),"in outer galaxy (2018 data)."
    if len(jobs)<50:
        jobs.extend(jobtracker.query("SELECT * FROM jobs " \
                                     "WHERE status='retrying' " \
                                     "ORDER BY updated_at ASC"))
        jobs.extend(jobtracker.query("SELECT * FROM jobs " \
                                     "WHERE status='new'" \
                                     "ORDER BY updated_at ASC"))

    for job in jobs[:500]:
        if config.jobpooler.queue_manager.can_submit():
            try:
                submit(job)
                if config.jobpooler.submit_sleep:
                   time.sleep(config.jobpooler.submit_sleep)
                   #time.sleep(5)
            except: 
		fn = pipeline_utils.get_fns_for_jobid(job['id'])[0]
		fn = fn.split('results/')[-1]
		print "Skipping job-id: %d (%s)"%(job['id'],fn)
        else:
            break
예제 #46
0
def main():
    fns = set(args)
    fns.update(options.files)

    for fn in fns:
        fn = os.path.abspath(fn)
        rows = jobtracker.query("SELECT * FROM files " \
                                "WHERE filename='%s' " \
                                    "AND status IN ('added', 'downloaded')" % fn)
        if not len(rows):
            print "Cannot remove %s. Either file isn't tracked, " \
                    "or it doesn't have status 'added' or 'downloaded'." % fn
            continue

        rows = jobtracker.query("SELECT * " \
                                "FROM job_files, files " \
                                "WHERE job_files.file_id=files.id " \
                                   "AND files.filename='%s'" % fn)
        if len(rows):
            print "Cannot remove %s. It is part of a job." % fn
            continue
        pipeline_utils.remove_file(fn)
예제 #47
0
def remove_logs(date='2019-07-01'):
    """
	New - Emilie Parent, July 2019    
	    Removes the logs files for a jobid that was sucessfully 
            uploaded  to the database at Cornell. 
            This is to avoid accumulating files, since the number of 
            files limit is low on Beluga.
	Input:
            date: look in the database for logs of jobs updated after that date (default = July 2019, i.e. on Beluga) 
	Output:
	    -
 
    """
    curdir = os.getcwd()
    os.chdir(config.basic.qsublog_dir)

    query1 = "SELECT id FROM jobs WHERE status='processed' AND details in ('Ready for upload','Processed without errors') and updated_at>'%s 00:00:00' ORDER BY id DESC"%date
    query2 = "SELECT id FROM jobs WHERE status='uploaded' and updated_at>'%s 00:00:00' ORDER BY id DESC"%date
    rows = jobtracker.query(query1)
    rows +=jobtracker.query(query2)
    job_ids = [str(r[0]) for r in rows]
    k=0
    for j in job_ids:
	jsub = jobtracker.query("SELECT * FROM job_submits where job_id=%s ORDER BY id DESC"%j)
	queue_ids = [str(s['queue_id'])+'.*' for s in jsub]
	for q in queue_ids:
		f = glob.glob(q)
		if len(f)==2:
			os.remove(f[0])
			os.remove(f[1])
			k+=1
		elif len(f)==1:
			os.remove(f[0])
			k+=1
    print "Removed logs of %d submitted jobs"%k
    os.chdir(curdir)
예제 #48
0
def get_space_committed():
    """Return space reserved to files to be downloaded.

        Inputs:
            None
        Outputs:
            reserved: Number of bytes reserved by files to be downloaded.
    """
    reserved = jobtracker.query("SELECT SUM(size) FROM files " \
                                "WHERE status IN ('downloading', 'new', " \
                                                 "'retrying', 'failed')", \
                                fetchone=True)[0]
    if reserved is None:
        reserved = 0
    return reserved
예제 #49
0
def get_space_used():
    """Return space used by the download directory (config.download.temp)

    Inputs:
        None
    Output:
        used: Size of download directory (in bytes)
    """
    files = jobtracker.query("SELECT * FROM files " \
                             "WHERE status IN ('downloaded', 'unverified')")

    total_size = 0
    for file in files:
        if os.path.exists(file['filename']):
            total_size += file['size']
    return total_size
예제 #50
0
def get_fns_for_jobid(jobid):
    """Given a job ID number, return a list of that job's data files.

        Input:
            jobid: The ID number from the job-tracker DB to get files for.
        
        Output:
            fns: A list of data files associated with the job ID.
    """
    query = "SELECT filename " \
            "FROM files, job_files " \
            "WHERE job_files.file_id=files.id " \
                "AND job_files.job_id=%d" % jobid
    rows = jobtracker.query(query)
    fns = [str(row['filename']) for row in rows]
    return fns
예제 #51
0
def create_download(file_path):
    filename = os.path.basename(file_path)
    filesize = os.path.getsize(file_path)
    query = "INSERT INTO files (" \
                "remote_filename, " \
                "filename, " \
                "status, " \
                "created_at, " \
                "updated_at, " \
                "size, " \
                "details) " \
            "VALUES ('%s','%s','%s','%s','%s',%u,'%s')" % \
            (filename, file_path, 'downloaded', jobtracker.nowstr(), \
                jobtracker.nowstr(), filesize, \
                "Manually added via add_files.py")
    return jobtracker.query(query)
예제 #52
0
def can_download():
    """Return true if another download can be initiated.
        False otherwise.

        Inputs:
            None
        Output:
            can_dl: A boolean value. True if another download can be
                    initiated. False otherwise.
    """
    downloading = jobtracker.query("SELECT * FROM files " \
                                   "WHERE status='downloading'")
    numdownload = len(downloading)
    used = get_space_used()
    avail = get_space_available()

    can_dl = (numdownload < config.download.numdownloads) and \
            (avail > config.download.min_free_space) and \
            (used < config.download.space_to_use)
    return can_dl
예제 #53
0
def can_request_more():
    """Returns whether Downloader can request more restores.
        This is based on took disk space allowed for downloaded
        file, disk space available on the file system, and maximum
        number of active requests allowed.

    Inputs:
        None
    Output:
        can_request: A boolean value. True if Downloader can make a request.
                        False otherwise.
    """
    active_requests = jobtracker.query("SELECT * FROM requests " \
                                       "WHERE status='waiting'")
    numactive = len(active_requests)
    used = get_space_used()
    avail = get_space_available()
    reserved = get_space_committed()

    can_request = (numactive < config.download.numrestores) and \
            (avail-reserved > config.download.min_free_space) and \
            (used+reserved < config.download.space_to_use)
    return can_request
예제 #54
0
def archive_logs():
    """
	New - Emilie Parent, July 2019    
	    Removes the logs files for a jobid that was sucessfully 
            uploaded  to the database at Cornell. 
            This is to avoid accumulating files, since the number of 
            files limit is low on Beluga.
	Input:
            jobid: The ID corresponding to a row from the job_submits table.
                   The files associated to this job will be removed. (job_id,
	           unlike id, uniquely defines a beam.
	Output:
            logs_to_del: List of logs.ER and logs.OU files that can be deleted.
 
    """
    curdir = os.getcwd()
    os.chdir(config.basic.qsublog_dir)

    tar = tarfile.open('/project/ctb-vkaspi/PALFA4/archived_logs2.tar','a')

    query = "SELECT * FROM job_submits WHERE status IN "\
	    "('uploaded','processed') AND updated_at>'2019-08-01 00:00:00'"
    rows = jobtracker.query(query)
    queue_ids = [str(rows[i]['queue_id']+'.*') for i in range(len(rows))]

    for q in queue_ids:
	f = glob.glob(q)
	if len(f)==2:
		tar.add(f[0])
		tar.add(f[1])
		print "Added a job's logs to the archived logs"
		os.remove(f[0])
		os.remove(f[1])

    tar.close()
    os.chdir(curdir)
예제 #55
0
def recover_failed_jobs():
    """Gather jobs with status 'failed' from the job-tracker DB.
        For each of these jobs see if it can be re-submitted.
        If it can, set the status to 'retrying'. If the
        job cannot be re-submitted, set the status to 'terminal_failure',
        and delete the raw data (if config is set for deletion).

        Depending on configurations emails may be sent.
    """
    failed_jobs = jobtracker.query("SELECT * FROM jobs " \
                                   "WHERE status='failed'")

    for job in failed_jobs:
        # Count the number of times this job has been submitted already
        submits = jobtracker.query("SELECT * FROM job_submits " \
                                   "WHERE job_id=%d " \
                                   "ORDER BY id DESC" % job['id'])

        if len(submits) < config.jobpooler.max_attempts:
            # We can re-submit this job.
            # Set status to 'retrying'.
            jobtracker.query("UPDATE jobs " \
                             "SET status='retrying', " \
                                  "updated_at='%s', " \
                                  "details='Job will be retried' " \
                             "WHERE id=%d" % \
                             (jobtracker.nowstr(), job['id']))
            jobpool_cout.outs("Job #%d will be retried." % job['id'])
        else:
            # We've run out of attempts for this job
            if config.basic.delete_rawdata:
                pipeline_utils.clean_up(job['id'])

            # Set status to 'terminal_failure'.
            jobtracker.query("UPDATE jobs " \
                             "SET status='terminal_failure', " \
                                  "updated_at='%s', " \
                                  "details='Job has failed permanently' " \
                             "WHERE id=%d" % \
                             (jobtracker.nowstr(), job['id']))
예제 #56
0
def create_download(file_path):
    filename = os.path.basename(file_path)
    filesize = os.path.getsize(file_path)
    in_query = "INSERT INTO files (remote_filename,filename,status,created_at,updated_at,size,details) VALUES ('%s','%s','%s','%s','%s',%u,'%s')"\
                        % (filename,file_path,'downloaded',datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"), int(filesize),"Manually added via add_files.py")
    return jobtracker.query(in_query)
예제 #57
0
def get_downloads():
    files = list()
    downloads = jobtracker.query("SELECT * FROM files")
    for download in downloads:
        files.append(download['filename'])
    return files
예제 #58
0
        # Update database statuses
        queries = []
        queries.append("UPDATE job_submits " \
                       "SET status='uploaded', " \
                            "details='Upload successful (header_id=%d)', " \
                            "updated_at='%s' " \
                       "WHERE id=%d" %
                       (header_id, jobtracker.nowstr(), job_submit['id']))
        queries.append("UPDATE jobs " \
                       "SET status='uploaded', " \
                            "details='Upload successful (header_id=%d)', " \
                            "updated_at='%s' " \
                       "WHERE id=%d" % \
                       (header_id, jobtracker.nowstr(), job_submit['job_id']))
        jobtracker.query(queries)

        print "Results successfully uploaded"

        if config.basic.delete_rawdata:
            pipeline_utils.clean_up(job_submit['job_id'])

        if debug.UPLOAD:
            upload.upload_timing_summary['End-to-end'] = \
                upload.upload_timing_summary.setdefault('End-to-end', 0) + \
                (time.time()-starttime)
            print "Upload timing summary:"
            for k in sorted(upload.upload_timing_summary.keys()):
                print "    %s: %.2f s" % (k, upload.upload_timing_summary[k])
        print ""  # Just a blank line
예제 #59
0
                    "queue_id TEXT, " \
                    "status TEXT, " \
                    "updated_at TEXT, " \
                    "output_dir TEXT)")
creates.append("CREATE TABLE jobs ( " \
                    "created_at TEXT, " \
                    "details TEXT, " \
                    "id INTEGER PRIMARY KEY, " \
                    "status TEXT, " \
                    "updated_at TEXT)")
creates.append("CREATE TABLE requests ( " \
                    "size INTEGER, " \
                    "numbits INTEGER, " \
                    "numrequested INTEGER, " \
                    "file_type TEXT, " \
                    "created_at TEXT, " \
                    "details TEXT, " \
                    "guid TEXT, " \
                    "id INTEGER PRIMARY KEY, " \
                    "status TEXT, " \
                    "updated_at TEXT)")

if not os.path.exists(config.background.jobtracker_db):
    print "Database file %s doesn't exist, creating a clean database." % \
                    config.background.jobtracker_db
    for table in creates:
        jobtracker.query(table)
else:
    print "Database file %s already exists. " \
            "Aborting creation of database." % config.background.jobtracker_db
예제 #60
0
    """
    import jobtracker
    import datafile
    try:
        datafile_type = datafile.get_datafile_type([fn])
    except datafile.DataFileError, e:
        if verbose:
            print "Unrecognized data file type: %s" % fn
        return False
    parsedfn = datafile_type.fnmatch(fn)
    if parsedfn.groupdict().setdefault('beam', '-1') == '7':
        if verbose:
            print "Ignoring beam 7 data: %s" % fn
        return False
    # Check if file is already in the job-tracker DB
    files = jobtracker.query("SELECT * FROM files " \
                             "WHERE filename LIKE '%%%s'" % os.path.split(fn)[-1])
    if len(files):
        if verbose:
            print "File is already being tracked: %s" % fn
        return False
    return True


def execute(cmd, stdout=subprocess.PIPE, stderr=sys.stderr, dir=None): 
    """Execute the command 'cmd' after logging the command
        to STDOUT. Execute the command in the directory 'dir',
        which defaults to the current directory is not provided.

        Output standard output to 'stdout' and standard
        error to 'stderr'. Both are strings containing filenames.
        If values are None, the out/err streams are not recorded.