def get_template_from_id(template_id, existdb=None, verify_md5=True): """Return the path to the raw file that has the given ID number. Optionally double check the file's MD5 sum, to make sure nothing strange has happened. Inputs: template_id: The ID number of the raw file to get a path for. existdb: A (optional) existing database connection object. (Default: Establish a db connection) verify_md5: If True, double check the file's MD5 sum. (Default: Perform MD5 check.) Output: fn: The full file path. """ notify.print_info("Looking-up raw file with ID=%d" % template_id, 2) # Use the exisitng DB connection, or open a new one if None was provided db = existdb or database.Database() db.connect() select = db.select([db.templates.c.filename, db.templates.c.filepath, db.templates.c.md5sum]).\ where(db.templates.c.template_id == template_id) result = db.execute(select) rows = result.fetchall() result.close() if not existdb: # Close the DB connection we opened db.close() if len(rows) == 1: filename = rows[0]['filename'] filepath = rows[0]['filepath'] md5sum_db = rows[0]['md5sum'] else: raise errors.InconsistentDatabaseError("Bad number of files (%d) " "with template_id=%d" % (len(rows), template_id)) fullpath = os.path.join(filepath, filename) # Make sure the file exists datafile.verify_file_path(fullpath) if verify_md5: notify.print_info("Confirming MD5 sum of %s matches what is " "stored in DB (%s)" % (fullpath, md5sum_db), 2) md5sum_file = datafile.get_md5sum(fullpath) if md5sum_db != md5sum_file: raise errors.FileError("md5sum check of %s failed! MD5 from " "DB (%s) != MD5 from file (%s)" % (fullpath, md5sum_db, md5sum_file)) return fullpath
def populate_parfiles_table(db, fn, params): # md5sum helper function in utils md5 = datafile.get_md5sum(fn) path, fn = os.path.split(os.path.abspath(fn)) db.begin() # Begin a transaction # Does this file exist already? select = db.select([db.parfiles.c.parfile_id, db.parfiles.c.pulsar_id], db.parfiles.c.md5sum == md5) result = db.execute(select) rows = result.fetchall() result.close() if len(rows) > 1: db.rollback() raise errors.InconsistentDatabaseError("There are %d parfiles " "with MD5 (%s) in the " "database already" % (len(rows), md5)) elif len(rows) == 1: parfile_id, psr_id = rows[0] if psr_id == params['pulsar_id']: warnings.warn("A parfile with this MD5 (%s) already exists " "in the DB for this pulsar (ID: %d). " "The file will not be re-registed into the DB. " "Doing nothing..." % (md5, psr_id), errors.ToasterWarning) else: db.rollback() raise errors.InconsistentDatabaseError("A parfile with this " "MD5 (%s) already exists " "in the DB, but for " "a different pulsar " "(ID: %d)!" % (md5, psr_id)) else: # Based on its MD5, this parfile doesn't already # exist in the DB. Insert it. # Insert the parfile ins = db.parfiles.insert() values = {'md5sum': md5, 'filename': fn, 'filepath': path} values.update(params) result = db.execute(ins, values) parfile_id = result.inserted_primary_key[0] result.close() db.commit() return parfile_id
def get_template_id(template, existdb=None): """Given a template file path find its template_id number. Inputs: template: the path to a template file. existdb: A (optional) existing database connection object. (Default: Establish a db connection) Output: template_id: the corresponding template_id value. """ # Use the exisitng DB connection, or open a new one if None was provided db = existdb or database.Database() db.connect() notify.print_info("Getting template ID for %s using " "filename and md5sum" % template, 2) path, fn = os.path.split(os.path.abspath(template)) md5sum = datafile.get_md5sum(template) select = db.select([db.templates.c.template_id, db.templates.c.filename, db.templates.c.md5sum]).\ where((db.template.c.md5sum == md5sum) | (db.templates.c.filename == fn)) result = db.execute(select) rows = result.fetchall() result.close() if not existdb: db.close() if len(rows) == 1: row = rows[0] if row['md5sum'] == md5sum and row['filename'] == fn: return row['template_id'] elif row['md5sum'] == md5sum: raise errors.FileError("A template (template_id=%d) with " "this md5sum, but a different filename " "exists in the DB." % row['template_id']) elif row['filename'] == fn: raise errors.FileError("A template (template_id=%d) with " "this filename, but a different md5sum " "exists in the DB." % row['template_id']) else: raise errors.InconsistentDatabaseError("A template " "(template_id=%d) " "matches our query, but " "neither its md5sum (%s), " "nor its filename (%s) " "appears to match! " "This should never " "happen!" % (row['template_id'], row['md5sum'], row['fn'])) elif len(rows) == 0: raise errors.ToasterError("Input template (%s) does not appear " "to be registered in the DB! " "Use 'load_template.py' to load " "it into the DB." % template) else: raise errors.InconsistentDatabaseError("Multiple (%s) templates " "match the given file name " "or md5sum!" % len(rows))
def populate_templates_table(db, fn, params, comments): if comments is None: raise errors.BadInputError("A comment is required for every " "template!") # md5sum helper function in utils md5 = datafile.get_md5sum(fn) path, fn = os.path.split(os.path.abspath(fn)) trans = db.begin() # Does this file exist already? select = db.select([db.templates.c.template_id, db.templates.c.pulsar_id]).\ where(db.templates.c.md5sum == md5) results = db.execute(select) rows = results.fetchall() results.close() if len(rows) > 1: db.rollback() raise errors.InconsistentDatabaseError("There are %d templates " "with MD5 (%s) in the " "database already" % (len(rows), md5)) elif len(rows) == 1: psr_id = rows[0]['pulsar_id'] template_id = rows[0]['template_id'] if psr_id == params['pulsar_id']: db.commit() warnings.warn("A template with this MD5 (%s) already exists " "in the DB for this pulsar (ID: %d). " "The file will not be re-registed into the DB. " "Doing nothing..." % (md5, psr_id), errors.ToasterWarning) return template_id else: db.rollback() raise errors.InconsistentDatabaseError("A template with this " "MD5 (%s) already exists " "in the DB, but for " "a different pulsar " "(ID: %d)!" % (md5, psr_id)) else: # Based on its MD5, this template doesn't already # exist in the DB. # Check to see if this pulsar/observing system combination # Already has a template select = db.select([db.templates.c.template_id]).\ where((db.templates.c.pulsar_id == params['pulsar_id']) & (db.templates.c.obssystem_id == params['obssystem_id'])) results = db.execute(select) rows = results.fetchall() results.close() if len(rows): warnings.warn("This pulsar_id (%d), obssystem_id (%d) " "combination already has %d templates in the DB. " "Be sure to correctly set the master template." % (params['pulsar_id'], params['obssystem_id'], len(rows))) # Insert the template ins = db.templates.insert() values = {'md5sum': md5, 'filename': fn, 'filepath': path, 'user_id': params['user_id'], 'pulsar_id': params['pulsar_id'], 'obssystem_id': params['obssystem_id'], 'nbin': params['nbin'], 'comments': comments} result = db.execute(ins, values) template_id = result.inserted_primary_key[0] result.close() db.commit() return template_id
def populate_rawfiles_table(db, archivefn, params): # md5sum helper function in utils md5 = datafile.get_md5sum(archivefn) path, fn = os.path.split(os.path.abspath(archivefn)) size = os.path.getsize(archivefn) # File size in bytes trans = db.begin() # Does this file exist already? select = db.select([db.rawfiles.c.rawfile_id, db.rawfiles.c.pulsar_id]).\ where(db.rawfiles.c.md5sum == md5) result = db.execute(select) rows = result.fetchall() result.close() if len(rows) > 1: trans.rollback() raise errors.InconsistentDatabaseError("There are %d rawfiles " "with MD5 (%s) in the " "database already" % (len(rows), md5)) elif len(rows) == 1: rawfile_id = rows[0]['rawfile_id'] psr_id = rows[0]['pulsar_id'] if psr_id == params['pulsar_id']: warnings.warn("A rawfile with this MD5 (%s) already exists " "in the DB for this pulsar (ID: %d). " "The file will not be re-registed into the DB. " "Doing nothing..." % (md5, psr_id), errors.ToasterWarning) trans.commit() return rawfile_id else: trans.rollback() raise errors.InconsistentDatabaseError("A rawfile with this " "MD5 (%s) already exists " "in the DB, but for " "a different pulsar " "(ID: %d)!" % (md5, psr_id)) else: notify.print_info("Inserting rawfile (%s) into DB." % fn, 3) # Based on its MD5, this rawfile doesn't already # exist in the DB. Insert it. # Insert the file ins = db.rawfiles.insert() values = {'md5sum': md5, 'filename': fn, 'filepath': path, 'filesize': size, 'coord': '%s,%s' % (params['ra'], params['dec'])} values.update(params) result = db.execute(ins, values) rawfile_id = result.inserted_primary_key[0] result.close() # Create rawfile diagnostics diags = [] for diagname in config.cfg.default_rawfile_diagnostics: diagcls = diagnostics.get_diagnostic_class(diagname) try: diags.append(diagcls(archivefn)) except errors.DiagnosticNotApplicable, e: notify.print_info("Diagnostic isn't applicable: %s. " "Skipping..." % str(e), 1) if diags: # Load processing diagnostics diagnose_rawfile.insert_rawfile_diagnostics(rawfile_id, diags, existdb=db)