Esempio n. 1
0
def create_jobs_for_new_files():
    """Check job-tracker DB for newly downloaded files. Group
        jobs that belong to the same observation and create
        entries in the jobs table.
    """
    # Get files that aren't already associated with a job
    rows = jobtracker.query("SELECT filename FROM files " \
                            "LEFT JOIN job_files " \
                                "ON job_files.file_id=files.id " \
                            "WHERE files.status IN ('downloaded', 'added') " \
                                "AND job_files.id IS NULL")
    print "selecting new entries."
    newfns = [str(row['filename']) for row in rows]

    # Group together files that belong together
    groups = datafile.group_files(newfns)
    print "grouping entries."

    # Keep only groups that are not missing any files
    complete_groups = [grp for grp in groups if datafile.is_complete(grp)]

    print "complete group entries."
    if complete_groups:
        jobpool_cout.outs("Inserting %d new entries into jobs table" % \
                            len(complete_groups))
        print "Inserting entries."
    for complete in complete_groups:
        # Insert new job and link it to data files
        queries = []
        queries.append("INSERT INTO jobs (" \
                            "created_at, " \
                            "details, " \
                            "status, " \
                            "updated_at) " \
                       "VALUES ('%s', '%s', '%s', '%s')" % \
                        (jobtracker.nowstr(), 'Newly created job', \
                            'new', jobtracker.nowstr()))
        queries.append("INSERT INTO job_files (" \
                            "file_id, " \
                            "created_at, " \
                            "job_id, " \
                            "updated_at) " \
                       "SELECT id, '%s', (SELECT LAST_INSERT_ROWID()), '%s' " \
                       "FROM files " \
                       "WHERE filename IN ('%s')" % \
                       (jobtracker.nowstr(), jobtracker.nowstr(), \
                        "', '".join(complete)))
        jobtracker.query(queries)
Esempio n. 2
0
def create_jobs_for_new_files():
    """Check job-tracker DB for newly downloaded files. Group
        jobs that belong to the same observation and create
        entries in the jobs table.
    """
    # Get files that aren't already associated with a job
    rows = jobtracker.query(
        "SELECT filename FROM files "
        "LEFT JOIN job_files "
        "ON job_files.file_id=files.id "
        "WHERE files.status IN ('downloaded', 'added') "
        "AND job_files.id IS NULL"
    )
    newfns = [str(row["filename"]) for row in rows]

    # Group together files that belong together
    groups = datafile.group_files(newfns)

    # Keep only groups that are not missing any files
    complete_groups = [grp for grp in groups if datafile.is_complete(grp)]

    if complete_groups:
        jobpool_cout.outs("Inserting %d new entries into jobs table" % len(complete_groups))
    for complete in complete_groups:
        # Insert new job and link it to data files
        queries = []
        queries.append(
            "INSERT INTO jobs ("
            "created_at, "
            "details, "
            "status, "
            "updated_at) "
            "VALUES ('%s', '%s', '%s', '%s')" % (jobtracker.nowstr(), "Newly created job", "new", jobtracker.nowstr())
        )
        queries.append(
            "INSERT INTO job_files ("
            "file_id, "
            "created_at, "
            "job_id, "
            "updated_at) "
            "SELECT id, '%s', (SELECT LAST_INSERT_ROWID()), '%s' "
            "FROM files "
            "WHERE filename IN ('%s')" % (jobtracker.nowstr(), jobtracker.nowstr(), "', '".join(complete))
        )
        jobtracker.query(queries)