예제 #1
0
def create_jobs_for_new_files():
    """Check job-tracker DB for newly downloaded files. Group
        jobs that belong to the same observation and create
        entries in the jobs table.
    """
    # Get files that were not associated with a job yet
    rows = jobtracker.query(
        "SELECT filename FROM files "
        "LEFT JOIN job_files "
        "ON job_files.file_id=files.id "
        "WHERE files.status IN ('downloaded', 'added') "
        "AND job_files.id IS NULL"
    )
    newfns = [str(row["filename"]) for row in rows]

    # Group together files that belong together
    groups = datafile.simple_group_files(newfns)

    # Keep only groups that are not missing any files
    complete_groups = [grp for grp in groups if SPAN512_job.is_complete(grp)]

    if complete_groups:
        jobpool_cout.outs("Inserting %d new entries into jobs table" % len(complete_groups))

    # Label the first task
    task_name = "rfifind"

    for complete in complete_groups:
        # Insert new job and link it to data files
        queries = []
        queries.append(
            "INSERT INTO jobs ("
            "created_at, "
            "details, "
            "status, "
            "task, "
            "updated_at) "
            "VALUES ('%s', '%s', '%s', '%s', '%s')"
            % (jobtracker.nowstr(), "Newly created job", "new", task_name, jobtracker.nowstr())
        )
        queries.append(
            "INSERT INTO job_files ("
            "file_id, "
            "created_at, "
            "job_id, "
            "updated_at) "
            "SELECT id, '%s', (SELECT LAST_INSERT_ID()), '%s' "
            "FROM files "
            "WHERE filename IN ('%s')" % (jobtracker.nowstr(), jobtracker.nowstr(), "', '".join(complete))
        )
        jobtracker.query(queries)