Ejemplo n.º 1
0
def get_total_uploaded(table_name):
    '''Given a table_name, get the total number of rows where upload is 1.'''

    query = "SELECT COUNT(*) FROM {0} WHERE uploaded=1".format(table_name)

    connect = olrcdb.DatabaseConnection()
    result = connect.execute_query(query)
    result_tuple = result.fetchone()
    return result_tuple[0]
Ejemplo n.º 2
0
def get_min_id(table_name):
    '''Return the minimum id from table_name where uploaded=0'''

    query = "SELECT MIN(id) FROM {0} WHERE uploaded=0".format(table_name)

    connect = olrcdb.DatabaseConnection()
    result = connect.execute_query(query)
    result_tuple = result.fetchone()
    if not result_tuple[0]:
        sys.exit("Nothing to upload from table {0}".format(table_name))
    return int(result_tuple[0])
Ejemplo n.º 3
0
def upload_table(lock, range, table_name, counter, speed):
    '''
    Given a table_name, upload all the paths from the table where upload is 0.
    Using the range value, complete a BATCH worth of uploads at a time.
    '''
    global FAILED_COUNT, BATCH

    connect = olrcdb.DatabaseConnection()

    # In order for the current process to upload a unique set of files,
    # acquire the lock to read from range's value.
    lock.acquire()
    while range.value <= TOTAL:

        # Grab a "BATCH" worth of file paths to upload.
        query = ("SELECT * FROM {0} WHERE uploaded=0"
                 " AND id >= {1} AND id <{2}".format(table_name, range.value,
                                                     range.value + BATCH))

        # Let other processes know this batch has been accounted for.
        range.value += BATCH
        lock.release()

        # Fetch results.
        result = connect.execute_query(query)
        path_tuple = result.fetchone()

        # Loop until we run out of rows from the batch
        while (path_tuple):

            # If the upload is successful, update the database
            if upload_file(path_tuple[1]):
                lock.acquire()
                counter.value += 1
                lock.release()
                set_uploaded(path_tuple[0], table_name)

            else:
                FAILED_COUNT += 1
                error_log = open(table_name + '.upload.error.log', 'a')
                error_log.write("\rFailed: {0}\n".format(
                    path_tuple[1].encode('utf-8')))
                error_log.close()

            print_status(counter, lock, speed, table_name)

            path_tuple = result.fetchone()
        lock.acquire()
        #Executes on the last range.
    lock.release()
Ejemplo n.º 4
0
def set_uploaded(id, table_name):
    '''For the given path, set uploaded to 1 in table_name.'''
    query = "UPDATE {0} set uploaded='1' WHERE id='{1}'".format(table_name, id)

    connect = olrcdb.DatabaseConnection()
    connect.execute_query(query)
Ejemplo n.º 5
0
    # Check required environment variables have been set
    if not env_vars_set():
        set_env_message = "The following environment variables need to be " \
            "set:\n"
        set_env_message += " \n".join(REQUIRED_VARIABLES)
        set_env_message += "\nPlease set these environment variables to " \
            "connect to the OLRC."
        print(set_env_message)
        exit(0)

    #Open error log:
    error_log = open(table_name + '.prepare.error.log', 'w+')
    error_log.write("From execution {0}:\n".format(str(
        datetime.datetime.now())))
    error_log.close()

    connect = olrcdb.DatabaseConnection()
    connect.create_table(table_name)
    prepare_upload(connect, directory, table_name)

    sys.stdout.flush()
    sys.stdout.write("\r{0} parsed. ".format(COUNT))
    if FAILED != 0:
        sys.stdout.write("\n{0} FAILED. See error.log.".format(FAILED))

    #Log the final count
    final_count = open(table_name + ".prepare.out", 'w+')
    final_count.write("\r{0} parsed. ".format(COUNT))
    final_count.close()