def check_file(fn): """Check file to make sure it exists, it's readable, it's a non-beam-7 PALFA data file, and it isn't already in the job-tracker DB. Input: fn: The file to check. Output: valid: A boolean value that is True if the file is valid for insert into the job-tracker DB. """ if os.path.exists(fn) and os.access(fn, os.R_OK): return pipeline_utils.can_add_file(fn, verbose=True) else: print "Not an existing readable file: %s" % fn return False
def create_file_entries2(request, files): """Given a row from the requests table in the job-tracker DB check the FTP server for its files and create entries in the files table. Input: request: A row from the requests table. Outputs: None """ cftp = CornellFTP.CornellFTP() try: files = cftp.get_files(request['guid']) except CornellFTP.M2Crypto.ftpslib.error_perm: exctype, excvalue, exctb = sys.exc_info() dlm_cout.outs("FTP error getting file information.\n" \ "\tGUID: %s\n\tError: %s" % \ (request['guid'], \ "".join(traceback.format_exception_only(exctype, excvalue)).strip())) files = [] print "Create_file_entries : %s new files " % str(len(files)) total_size = 0 num_files = 0 queries = [] k = 1 for fn, size in files: if k % 10 == 0: print k, '/', len(files) k += 1 if not pipeline_utils.can_add_file(fn, verbose=True): dlm_cout.outs("Skipping %s" % fn) continue # Insert entry into DB's files table queries.append("INSERT INTO files ( " \ "request_id, " \ "remote_filename, " \ "filename, " \ "status, " \ "created_at, " \ "updated_at, " \ "size) " \ "VALUES ('%s', '%s', '%s', '%s', '%s', '%s', %d)" % \ (request['id'], fn, os.path.join(config.download.datadir, fn), \ 'new', jobtracker.nowstr(), jobtracker.nowstr(), size)) total_size += size num_files += 1 if num_files: dlm_cout.outs("Request (GUID: %s) has succeeded.\n" \ "\tNumber of files to be downloaded: %d" % \ (request['guid'], num_files)) queries.append("UPDATE requests " \ "SET size=%d, " \ "updated_at='%s', " \ "status='downloading', " \ "details='Request has been filled' " \ "WHERE id=%d" % \ (total_size, jobtracker.nowstr(), request['id'])) else: dlm_cout.outs("Request (GUID: %s) has failed.\n" \ "\tThere are no files to be downloaded." % \ request['guid']) # delete restore since there may be skipped files """ web_service = CornellWebservice.Client() delete_status = web_service.Deleter(guid=request['guid'], \ username=config.download.api_username, \ pw=config.download.api_password) if delete_status == "deletion successful": dlm_cout.outs("Deletion (%s) succeeded." % request['guid']) elif delete_status == "invalid user": dlm_cout.outs("Deletion (%s) failed due to invalid user." % \ request['guid']) elif delete_status == "deletion failed": dlm_cout.outs("Deletion (%s) failed for unknown reasons." % \ request['guid']) """ # redefine 'queries' because there are no files to update queries = ["UPDATE requests " \ "SET updated_at='%s', " \ "status='failed', " \ "details='No files to download.' " \ "WHERE id=%d" % \ (jobtracker.nowstr(), request['id'])] jobtracker.query(queries)
def create_file_entries(request): """Given a row from the requests table in the job-tracker DB check the FTP server for its files and create entries in the files table. Input: request: A row from the requests table. Outputs: None """ cftp = CornellFTP.CornellFTP() try: files = cftp.get_files(request['guid']) except CornellFTP.M2Crypto.ftpslib.error_perm: exctype, excvalue, exctb = sys.exc_info() dlm_cout.outs("FTP error getting file information.\n" \ "\tGUID: %s\n\tError: %s" % \ (request['guid'], \ "".join(traceback.format_exception_only(exctype, excvalue)).strip())) files = [] total_size = 0 num_files = 0 queries = [] for fn, size in files: if not pipeline_utils.can_add_file(fn): dlm_cout.outs("Skipping %s" % fn) continue # Insert entry into DB's files table queries.append("INSERT INTO files ( " \ "request_id, " \ "remote_filename, " \ "filename, " \ "status, " \ "created_at, " \ "updated_at, " \ "size) " \ "VALUES ('%s', '%s', '%s', '%s', '%s', '%s', %d)" % \ (request['id'], fn, os.path.join(config.download.datadir, fn), \ 'new', jobtracker.nowstr(), jobtracker.nowstr(), size)) total_size += size num_files += 1 if num_files: dlm_cout.outs("Request (GUID: %s) has succeeded.\n" \ "\tNumber of files to be downloaded: %d" % \ (request['guid'], num_files)) queries.append("UPDATE requests " \ "SET size=%d, " \ "updated_at='%s', " \ "status='finished', " \ "details='Request has been filled' " \ "WHERE id=%d" % \ (total_size, jobtracker.nowstr(), request['id'])) else: dlm_cout.outs("Request (GUID: %s) has failed.\n" \ "\tThere are no files to be downloaded." % \ request['guid']) # redefine 'queries' because there are no files to update queries = ["UPDATE requests " \ "SET updated_at='%s', " \ "status='failed', " \ "details='No files to download' " \ "WHERE id=%d" % \ (jobtracker.nowstr(), request['id'])] jobtracker.query(queries)