def create_jobs_for_new_files(): """Check job-tracker DB for newly downloaded files. Group jobs that belong to the same observation and create entries in the jobs table. """ # Get files that aren't already associated with a job rows = jobtracker.query("SELECT filename FROM files " \ "LEFT JOIN job_files " \ "ON job_files.file_id=files.id " \ "WHERE files.status IN ('downloaded', 'added') " \ "AND job_files.id IS NULL") print "selecting new entries." newfns = [str(row['filename']) for row in rows] # Group together files that belong together groups = datafile.group_files(newfns) print "grouping entries." # Keep only groups that are not missing any files complete_groups = [grp for grp in groups if datafile.is_complete(grp)] print "complete group entries." if complete_groups: jobpool_cout.outs("Inserting %d new entries into jobs table" % \ len(complete_groups)) print "Inserting entries." for complete in complete_groups: # Insert new job and link it to data files queries = [] queries.append("INSERT INTO jobs (" \ "created_at, " \ "details, " \ "status, " \ "updated_at) " \ "VALUES ('%s', '%s', '%s', '%s')" % \ (jobtracker.nowstr(), 'Newly created job', \ 'new', jobtracker.nowstr())) queries.append("INSERT INTO job_files (" \ "file_id, " \ "created_at, " \ "job_id, " \ "updated_at) " \ "SELECT id, '%s', (SELECT LAST_INSERT_ROWID()), '%s' " \ "FROM files " \ "WHERE filename IN ('%s')" % \ (jobtracker.nowstr(), jobtracker.nowstr(), \ "', '".join(complete))) jobtracker.query(queries)
def create_jobs_for_new_files(): """Check job-tracker DB for newly downloaded files. Group jobs that belong to the same observation and create entries in the jobs table. """ # Get files that aren't already associated with a job rows = jobtracker.query( "SELECT filename FROM files " "LEFT JOIN job_files " "ON job_files.file_id=files.id " "WHERE files.status IN ('downloaded', 'added') " "AND job_files.id IS NULL" ) newfns = [str(row["filename"]) for row in rows] # Group together files that belong together groups = datafile.group_files(newfns) # Keep only groups that are not missing any files complete_groups = [grp for grp in groups if datafile.is_complete(grp)] if complete_groups: jobpool_cout.outs("Inserting %d new entries into jobs table" % len(complete_groups)) for complete in complete_groups: # Insert new job and link it to data files queries = [] queries.append( "INSERT INTO jobs (" "created_at, " "details, " "status, " "updated_at) " "VALUES ('%s', '%s', '%s', '%s')" % (jobtracker.nowstr(), "Newly created job", "new", jobtracker.nowstr()) ) queries.append( "INSERT INTO job_files (" "file_id, " "created_at, " "job_id, " "updated_at) " "SELECT id, '%s', (SELECT LAST_INSERT_ROWID()), '%s' " "FROM files " "WHERE filename IN ('%s')" % (jobtracker.nowstr(), jobtracker.nowstr(), "', '".join(complete)) ) jobtracker.query(queries)